blob: df20749072ce24822a6aa4df8201236191c909c5 [file] [log] [blame]
//
// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "QuantizerStrategy.hpp"
#include "armnn/utility/PolymorphicDowncast.hpp"
namespace armnn
{
QuantizerStrategy::QuantizerStrategy(const RangeTracker& rangeTracker,
const IQuantizationScheme* quantizationScheme,
bool preserveType)
: m_Ranges(rangeTracker)
, m_QuantizedNetwork(INetwork::Create())
, m_QuantizationScheme(quantizationScheme)
, m_PreserveType(preserveType)
{
}
void QuantizerStrategy::SetQuantizedInputConnections(const IConnectableLayer* srcLayer,
IConnectableLayer* quantizedLayer)
{
ARMNN_ASSERT(srcLayer);
for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++)
{
const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i);
const InputSlot* inputSlot = static_cast<const InputSlot*>(&srcInputSlot);
ARMNN_ASSERT(inputSlot);
const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
ARMNN_ASSERT(outputSlot);
unsigned int slotIdx = outputSlot->CalculateIndexOnOwner();
Layer& layerToFind = outputSlot->GetOwningLayer();
auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid());
if (found == m_OriginalToQuantizedGuidMap.end())
{
// Error in graph traversal order
ARMNN_ASSERT_MSG(false, "Error in graph traversal");
return;
}
// Connect the slots in the quantized model
IConnectableLayer* prevQuantizedLayer = m_QuantizedGuidToLayerMap[found->second];
IInputSlot& newInputSlot = quantizedLayer->GetInputSlot(i);
IOutputSlot& newOutputSlot = prevQuantizedLayer->GetOutputSlot(slotIdx);
newOutputSlot.Connect(newInputSlot);
TensorInfo info(outputSlot->GetTensorInfo());
// Only try to set quantization params on tensors that can be quantized
if (inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Boolean &&
inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed32 &&
inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed64)
{
// Fetch the min/max ranges that were computed earlier
auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx);
OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second);
info.SetDataType(m_QuantizationScheme->GetDataType());
info.SetQuantizationOffset(qParams.second);
info.SetQuantizationScale(qParams.first);
}
newOutputSlot.SetTensorInfo(info);
}
}
ConstTensor QuantizerStrategy::CreateQuantizedBias(const IConnectableLayer* srcLayer,
const ConstTensor& weights,
const Optional<ConstTensor>& biases,
std::vector<int32_t>& backing)
{
ARMNN_ASSERT(srcLayer);
const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0);
auto inputSlot = static_cast<const InputSlot*>(&srcInputSlot);
ARMNN_ASSERT(inputSlot);
const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
ARMNN_ASSERT(outputSlot);
unsigned int slotIdx = outputSlot->CalculateIndexOnOwner();
Layer& layerToFind = outputSlot->GetOwningLayer();
auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid());
if (found == m_OriginalToQuantizedGuidMap.end())
{
// Error in graph traversal order
ARMNN_ASSERT_MSG(false, "Error in graph traversal");
return biases.value();
}
// Fetch the min/max ranges that were computed earlier
auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx);
OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second);
// Get the quantization scale based on input and weight scale
float scale = qParams.first * weights.GetInfo().GetQuantizationScale();
// Set up quantized bias tensor info and allocate space
TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0);
backing.resize(biases.value().GetInfo().GetNumElements());
// Convert values to int32
for (size_t i = 0; i < backing.size(); ++i)
{
float fp32Value = static_cast<const float*>(biases.value().GetMemoryArea())[i];
backing[i] = armnn::numeric_cast<int32_t>(fp32Value * ( 1 / scale ));
}
return ConstTensor(qInfo, backing);
}
void QuantizerStrategy::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer)
{
m_OriginalToQuantizedGuidMap.insert(std::make_pair(srcLayer->GetGuid(), quantizedLayer->GetGuid()));
m_QuantizedGuidToLayerMap.insert(std::make_pair(quantizedLayer->GetGuid(), quantizedLayer));
}
void QuantizerStrategy::ExecuteStrategy(const armnn::IConnectableLayer *layer,
const BaseDescriptor& descriptor,
const std::vector<armnn::ConstTensor> &constants,
const char *name,
const armnn::LayerBindingId id)
{
IgnoreUnused(id);
IConnectableLayer* newLayer;
switch (layer->GetType())
{
case armnn::LayerType::Addition :
{
newLayer = m_QuantizedNetwork->AddAdditionLayer(name);
break;
}
case armnn::LayerType::Activation :
{
const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddActivationLayer(activationDescriptor, name);
break;
}
case armnn::LayerType::ArgMinMax :
{
ArgMinMaxDescriptor argMinMaxDescriptor = static_cast<const ArgMinMaxDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddArgMinMaxLayer(argMinMaxDescriptor, name);
break;
}
case armnn::LayerType::BatchNormalization :
{
BatchNormalizationDescriptor batchNormalizationDescriptor =
static_cast<const BatchNormalizationDescriptor&>(descriptor);
std::vector<uint8_t> meanBacking;
ConstTensor qMean = CreateQuantizedConst(constants[0], meanBacking);
std::vector<uint8_t> varianceBacking;
ConstTensor qVariance = CreateQuantizedConst(constants[1], varianceBacking);
std::vector<uint8_t> betaBacking;
ConstTensor qBeta = CreateQuantizedConst(constants[2], betaBacking);
std::vector<uint8_t> gammaBacking;
ConstTensor qGamma = CreateQuantizedConst(constants[3], gammaBacking);
newLayer = m_QuantizedNetwork->AddBatchNormalizationLayer(batchNormalizationDescriptor,
qMean,
qVariance,
qBeta,
qGamma,
name);
break;
}
case armnn::LayerType::BatchToSpaceNd :
{
BatchToSpaceNdDescriptor batchToSpaceNdDescriptor =
static_cast<const BatchToSpaceNdDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name);
break;
}
case armnn::LayerType::Comparison :
{
ComparisonDescriptor comparisonDescriptor =static_cast<const ComparisonDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddComparisonLayer(comparisonDescriptor, name);
break;
}
case armnn::LayerType::Concat :
{
OriginsDescriptor originsDescriptor = static_cast<const OriginsDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddConcatLayer(originsDescriptor, name);
break;
}
case armnn::LayerType::Constant :
{
std::vector<uint8_t> inputBacking;
ConstTensor qInput = CreateQuantizedConst(constants[0], inputBacking);
newLayer = m_QuantizedNetwork->AddConstantLayer(qInput, name);
break;
}
case armnn::LayerType::Convolution2d :
{
const armnn::Optional<ConstTensor> biases = constants.size() == 1 ?
armnn::Optional<ConstTensor>{} :
armnn::Optional<ConstTensor>(constants[1]);
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking);
Optional<ConstTensor> optionalQBiases;
std::vector<int32_t> biasesBacking;
if (biases.has_value())
{
ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}
Convolution2dDescriptor convolution2dDescriptor = static_cast<const Convolution2dDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddConvolution2dLayer(convolution2dDescriptor,
qWeights,
optionalQBiases,
name);
break;
}
case armnn::LayerType::DepthToSpace :
{
DepthToSpaceDescriptor depthToSpaceDescriptor = static_cast<const DepthToSpaceDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddDepthToSpaceLayer(depthToSpaceDescriptor, name);
break;
}
case armnn::LayerType::DepthwiseConvolution2d :
{
DepthwiseConvolution2dDescriptor depthwiseConvolution2dDescriptor =
static_cast<const DepthwiseConvolution2dDescriptor&>(descriptor);
const armnn::Optional<ConstTensor> biases = constants.size() == 1 ?
armnn::Optional<ConstTensor>{} :
armnn::Optional<ConstTensor>(constants[1]);
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking);
Optional<ConstTensor> optionalQBiases;
std::vector<int32_t> biasesBacking;
if (biases.has_value())
{
ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}
newLayer = m_QuantizedNetwork->AddDepthwiseConvolution2dLayer(
depthwiseConvolution2dDescriptor,
qWeights,
optionalQBiases,
name);
break;
}
case armnn::LayerType::ElementwiseUnary :
{
ElementwiseUnaryDescriptor elementwiseUnaryDescriptor =
static_cast<const ElementwiseUnaryDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name);
break;
}
case armnn::LayerType::Fill :
{
FillDescriptor fillDescriptor = static_cast<const FillDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddFillLayer(fillDescriptor, name);
break;
}
case armnn::LayerType::FullyConnected :
{
FullyConnectedDescriptor fullyConnectedDescriptor =
static_cast<const FullyConnectedDescriptor&>(descriptor);
const armnn::Optional<ConstTensor> biases = constants.size() == 1 ?
armnn::Optional<ConstTensor>{} :
armnn::Optional<ConstTensor>(constants[1]);
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking);
Optional<ConstTensor> optionalQBiases;
std::vector<int32_t> biasesBacking;
if (biases.has_value())
{
ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}
newLayer = m_QuantizedNetwork->AddFullyConnectedLayer(fullyConnectedDescriptor,
qWeights,
optionalQBiases,
name);
break;
}
case armnn::LayerType::Input :
{
const DataType dataType = layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
IConnectableLayer* inputLayer = m_QuantizedNetwork->AddInputLayer(id, name);
if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16))
{
IConnectableLayer* quantizeLayer = m_QuantizedNetwork->AddQuantizeLayer();
inputLayer->GetOutputSlot(0).Connect(quantizeLayer->GetInputSlot(0));
inputLayer->GetOutputSlot(0).SetTensorInfo(layer->GetOutputSlot(0).GetTensorInfo());
RecordLayer(layer, quantizeLayer);
return;
}
else
{
RecordLayer(layer, inputLayer);
return;
}
}
case armnn::LayerType::InstanceNormalization :
{
InstanceNormalizationDescriptor instanceNormalizationDescriptor =
static_cast<const InstanceNormalizationDescriptor&>(descriptor);
newLayer =
m_QuantizedNetwork->AddInstanceNormalizationLayer(instanceNormalizationDescriptor, name);
break;
}
case armnn::LayerType::LogSoftmax :
{
LogSoftmaxDescriptor logSoftmaxDescriptor = static_cast<const LogSoftmaxDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddLogSoftmaxLayer(logSoftmaxDescriptor, name);
break;
}
case armnn::LayerType::Mean :
{
MeanDescriptor meanDescriptor = static_cast<const MeanDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddMeanLayer(meanDescriptor, name);
break;
}
case armnn::LayerType::Multiplication :
{
newLayer = m_QuantizedNetwork->AddMultiplicationLayer(name);
break;
}
case armnn::LayerType::Normalization :
{
NormalizationDescriptor normalizationDescriptor = static_cast<const NormalizationDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddNormalizationLayer(normalizationDescriptor, name);
break;
}
case armnn::LayerType::Output :
{
const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo();
const DataType& dataType = info.GetDataType();
newLayer = m_QuantizedNetwork->AddOutputLayer(id, name);
if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16))
{
IConnectableLayer* dequantizeLayer = m_QuantizedNetwork->AddDequantizeLayer();
RecordLayer(layer, dequantizeLayer);
SetQuantizedInputConnections(layer, dequantizeLayer);
dequantizeLayer->GetOutputSlot(0).Connect(newLayer->GetInputSlot(0));
dequantizeLayer->GetOutputSlot(0).SetTensorInfo(info);
return;
}
else
{
break;
}
}
case armnn::LayerType::Pad :
{
PadDescriptor padDescriptor = static_cast<const PadDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddPadLayer(padDescriptor, name);
break;
}
case armnn::LayerType::Permute :
{
PermuteDescriptor permuteDescriptor = static_cast<const PermuteDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddPermuteLayer(permuteDescriptor, name);
break;
}
case armnn::LayerType::Pooling2d :
{
Pooling2dDescriptor pooling2dDescriptor = static_cast<const Pooling2dDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddPooling2dLayer(pooling2dDescriptor, name);
break;
}
case armnn::LayerType::Prelu :
{
newLayer = m_QuantizedNetwork->AddPreluLayer(name);
break;
}
case armnn::LayerType::Reshape :
{
ReshapeDescriptor reshapeDescriptor = static_cast<const ReshapeDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddReshapeLayer(reshapeDescriptor, name);
break;
}
case armnn::LayerType::Resize :
{
ResizeBilinearDescriptor resizeBilinearDescriptor =
static_cast<const ResizeBilinearDescriptor&>(descriptor);
ResizeDescriptor resizeDescriptor;
resizeDescriptor.m_Method = ResizeMethod::Bilinear;
resizeDescriptor.m_TargetWidth = resizeBilinearDescriptor.m_TargetWidth;
resizeDescriptor.m_TargetHeight = resizeBilinearDescriptor.m_TargetHeight;
resizeDescriptor.m_DataLayout = resizeBilinearDescriptor.m_DataLayout;
newLayer = m_QuantizedNetwork->AddResizeLayer(resizeDescriptor, name);
break;
}
case armnn::LayerType::Slice :
{
SliceDescriptor sliceDescriptor = static_cast<const SliceDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddSliceLayer(sliceDescriptor, name);
break;
}
case armnn::LayerType::Softmax :
{
SoftmaxDescriptor softmaxDescriptor = static_cast<const SoftmaxDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddSoftmaxLayer(softmaxDescriptor, name);
break;
}
case armnn::LayerType::SpaceToBatchNd :
{
SpaceToBatchNdDescriptor spaceToBatchNdDescriptor =
static_cast<const SpaceToBatchNdDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name);
break;
}
case armnn::LayerType::SpaceToDepth :
{
SpaceToDepthDescriptor spaceToDepthDescriptor = static_cast<const SpaceToDepthDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddSpaceToDepthLayer(spaceToDepthDescriptor, name);
break;
}
case armnn::LayerType::Splitter :
{
SplitterDescriptor splitterDescriptor = static_cast<const SplitterDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddSplitterLayer(splitterDescriptor, name);
break;
}
case armnn::LayerType::Stack :
{
StackDescriptor stackDescriptor = static_cast<const StackDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddStackLayer(stackDescriptor, name);
break;
}
case armnn::LayerType::StridedSlice :
{
StridedSliceDescriptor stridedSliceDescriptor = static_cast<const StridedSliceDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddStridedSliceLayer(stridedSliceDescriptor, name);
break;
}
case armnn::LayerType::Subtraction :
{
newLayer = m_QuantizedNetwork->AddSubtractionLayer( name);
break;
}
case armnn::LayerType::TransposeConvolution2d :
{
const armnn::Optional<ConstTensor> biases = constants.size() == 1 ?
armnn::Optional<ConstTensor>{} :
armnn::Optional<ConstTensor>(constants[1]);
// quantize weights
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking);
// quantize biases
std::vector<int32_t> biasesBacking;
Optional<ConstTensor> optionalQBiases;
if (biases.has_value())
{
ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}
TransposeConvolution2dDescriptor transposeConvolution2dDescriptor =
static_cast<const TransposeConvolution2dDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddTransposeConvolution2dLayer(transposeConvolution2dDescriptor,
qWeights,
optionalQBiases,
name);
break;
}
case armnn::LayerType::Transpose :
{
TransposeDescriptor transposeDescriptor = static_cast<const TransposeDescriptor&>(descriptor);
newLayer = m_QuantizedNetwork->AddTransposeLayer(transposeDescriptor, name);
break;
}
default:
{
throw UnimplementedException("Unimplemented layer encountered");
}
}
RecordLayer(layer, newLayer);
SetQuantizedInputConnections(layer, newLayer);
}
}