src/armnn/DynamicQuantizationStrategy.cpp - ml/armnn - Gitiles

 //
 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //

 #include "DynamicQuantizationStrategy.hpp"
 #include "NetworkUtils.hpp"

 #include <armnn/Descriptors.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
 #include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnn/Types.hpp>

 #include <limits>

 namespace armnn
 {
 DynamicQuantizationStrategy::DynamicQuantizationStrategy(RangeTracker& rangeTracker, Graph& graph)
         : m_RangeTracker(rangeTracker),
           m_Graph(graph)
 {}

 void DynamicQuantizationStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
 {
     m_RangeTracker.SetRange(layer, outputIdx, min, max);
 }

 void DynamicQuantizationStrategy::ForwardParentParameters(const IConnectableLayer* layer)
 {
     for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
     {
         const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
         LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
         unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
         const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
         SetRange(layer, i, parentRange.first, parentRange.second);
     }
 }

 void DynamicQuantizationStrategy::AddToCalibratedLayers(const IConnectableLayer* layer)
 {
     m_LayersToCalibrate.push_back(layer);
 }

 void DynamicQuantizationStrategy::AddToNonCalibratedLayers(const IConnectableLayer* layer)
 {
     m_LayersNotToCalibrate.push_back(layer);
 }

 void DynamicQuantizationStrategy::FinishStrategy()
 {
     for (const IConnectableLayer* layer : m_LayersToCalibrate)
     {
         std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
             m_Graph, *PolymorphicDowncast<Layer*>(const_cast<IConnectableLayer*>(layer)));
         // record them so we can take them out again efficiently afterward
         m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
     }
 }

 void DynamicQuantizationStrategy::RemoveDebugLayers()
 {
     for (DebugLayer* debugLayer : m_DebugLayers)
     {
         OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
         proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));

         for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
         {
             debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
             proceedingOutputSlot.Connect(*succeedingInputSlot);
         }
         m_Graph.EraseLayer(debugLayer);
     }
     m_DebugLayers.clear();
 }

 void DynamicQuantizationStrategy::VisitNonCalibratedLayers() {
     RemoveDebugLayers();
     for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
     {
         ForwardParentParameters(layer);
     }
 }


 void DynamicQuantizationStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer,
                                                   const BaseDescriptor& descriptor,
                                                   const std::vector<armnn::ConstTensor>& constants,
                                                   const char* name,
                                                   const armnn::LayerBindingId id)
 {
     IgnoreUnused(name);
     IgnoreUnused(id);
     IgnoreUnused(descriptor);

     switch (layer->GetType())
     {
         case armnn::LayerType::Activation :
         {
             const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor);
             switch (activationDescriptor.m_Function)
             {
                 // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
                 case ActivationFunction::Abs:
                 case ActivationFunction::Linear:
                 case ActivationFunction::ReLu:
                 case ActivationFunction::SoftReLu:
                     SetRange(layer, 0, 0.f, 15.f);
                     break;
                 case ActivationFunction::BoundedReLu:
                     SetRange(layer, 0, 0.f, activationDescriptor.m_A);
                     break;
                 case ActivationFunction::TanH:
                     SetRange(layer, 0, -1.f, 1.f);
                     break;
                 case ActivationFunction::LeakyReLu:
                     SetRange(layer, 0, -5.f, 15.f);
                     break;
                 default:
                     SetRange(layer, 0, -15.f, 15.f);
                     break;
             }
             break;
         }
         case armnn::LayerType::Addition :
         {
             SetRange(layer, 0, -20.f, 20.f);
             AddToCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::ArgMinMax :
         {
             AddToNonCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::BatchNormalization :
         {
             SetRange(layer, 0, -15.0f, 15.0f);
             AddToCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Normalization:
         {
             SetRange(layer, 0, -15.0f, 15.0f);
             AddToCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Convolution2d:
         {
             SetRange(layer, 0, -15.0f, 15.0f);
             AddToCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::DepthwiseConvolution2d:
         {
             SetRange(layer, 0, -15.0f, 15.0f);
             AddToCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::FullyConnected :
         {
             SetRange(layer, 0, -15.0f, 15.0f);
             AddToCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Permute :
         {
             AddToNonCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::SpaceToBatchNd :
         {
             AddToNonCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Pooling2d :
         {
             AddToNonCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Softmax :
         {
             SetRange(layer, 0, 0.f, 1.f);
             AddToCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Constant :
         {
             if (constants[0].GetDataType() != DataType::Float32)
             {
                 throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
             }

             // Work out the range based on the input constants
             unsigned int inputNumElements = constants[0].GetNumElements();
             const float* inputData = reinterpret_cast<const float*>(constants[0].GetMemoryArea());

             float min = std::numeric_limits<float>::max();
             float max = std::numeric_limits<float>::lowest();

             for (unsigned int i = 0; i < inputNumElements; i++)
             {
                 const float inputValue = inputData[i];

                 min = std::min(min, inputValue);
                 max = std::max(max, inputValue);
             }
             SetRange(layer, 0, min, max);
             break;
         }
         case armnn::LayerType::Concat :
         {
             float min = std::numeric_limits<float>::max();
             float max = std::numeric_limits<float>::lowest();
             for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
             {
                 const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
                 LayerGuid layerId = outputSlot->GetOwningLayerGuid();
                 unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
                 RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
                 min = std::min(min, range.first);
                 max = std::max(max, range.second);
             }
             SetRange(layer, 0, min, max);
             AddToCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Reshape :
         {
             AddToNonCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Splitter :
         {
             AddToNonCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Resize :
         {
             AddToNonCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::StridedSlice :
         {
             AddToNonCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::BatchToSpaceNd :
         {
             AddToNonCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Input :
         {
             SetRange(layer, 0, -0.0f, 0.0f);
             AddToCalibratedLayers(layer);
             break;
         }
         case armnn::LayerType::Output :
         {
             AddToNonCalibratedLayers(layer);
             m_OutputLayers.push_back(id);
             break;
         }
         default:
         {}
     }
 }

 const std::vector<LayerBindingId>& DynamicQuantizationStrategy::GetOutputLayers()
 {
     return m_OutputLayers;
 }

 } //namespace armnn
	//
	// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
	// SPDX-License-Identifier: MIT
	//

	#include "DynamicQuantizationStrategy.hpp"
	#include "NetworkUtils.hpp"

	#include <armnn/Descriptors.hpp>
	#include <armnn/utility/IgnoreUnused.hpp>
	#include <armnn/utility/PolymorphicDowncast.hpp>
	#include <armnn/Types.hpp>

	#include <limits>

	namespace armnn
	{
	DynamicQuantizationStrategy::DynamicQuantizationStrategy(RangeTracker& rangeTracker, Graph& graph)
	: m_RangeTracker(rangeTracker),
	m_Graph(graph)
	{}

	void DynamicQuantizationStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
	{
	m_RangeTracker.SetRange(layer, outputIdx, min, max);
	}

	void DynamicQuantizationStrategy::ForwardParentParameters(const IConnectableLayer* layer)
	{
	for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
	{
	const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
	LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
	unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
	const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
	SetRange(layer, i, parentRange.first, parentRange.second);
	}
	}

	void DynamicQuantizationStrategy::AddToCalibratedLayers(const IConnectableLayer* layer)
	{
	m_LayersToCalibrate.push_back(layer);
	}

	void DynamicQuantizationStrategy::AddToNonCalibratedLayers(const IConnectableLayer* layer)
	{
	m_LayersNotToCalibrate.push_back(layer);
	}

	void DynamicQuantizationStrategy::FinishStrategy()
	{
	for (const IConnectableLayer* layer : m_LayersToCalibrate)
	{
	std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
	m_Graph, PolymorphicDowncast<Layer>(const_cast<IConnectableLayer*>(layer)));
	// record them so we can take them out again efficiently afterward
	m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
	}
	}

	void DynamicQuantizationStrategy::RemoveDebugLayers()
	{
	for (DebugLayer* debugLayer : m_DebugLayers)
	{
	OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
	proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));

	for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
	{
	debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
	proceedingOutputSlot.Connect(*succeedingInputSlot);
	}
	m_Graph.EraseLayer(debugLayer);
	}
	m_DebugLayers.clear();
	}

	void DynamicQuantizationStrategy::VisitNonCalibratedLayers() {
	RemoveDebugLayers();
	for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
	{
	ForwardParentParameters(layer);
	}
	}


	void DynamicQuantizationStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer,
	const BaseDescriptor& descriptor,
	const std::vector<armnn::ConstTensor>& constants,
	const char* name,
	const armnn::LayerBindingId id)
	{
	IgnoreUnused(name);
	IgnoreUnused(id);
	IgnoreUnused(descriptor);

	switch (layer->GetType())
	{
	case armnn::LayerType::Activation :
	{
	const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor);
	switch (activationDescriptor.m_Function)
	{
	// Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
	case ActivationFunction::Abs:
	case ActivationFunction::Linear:
	case ActivationFunction::ReLu:
	case ActivationFunction::SoftReLu:
	SetRange(layer, 0, 0.f, 15.f);
	break;
	case ActivationFunction::BoundedReLu:
	SetRange(layer, 0, 0.f, activationDescriptor.m_A);
	break;
	case ActivationFunction::TanH:
	SetRange(layer, 0, -1.f, 1.f);
	break;
	case ActivationFunction::LeakyReLu:
	SetRange(layer, 0, -5.f, 15.f);
	break;
	default:
	SetRange(layer, 0, -15.f, 15.f);
	break;
	}
	break;
	}
	case armnn::LayerType::Addition :
	{
	SetRange(layer, 0, -20.f, 20.f);
	AddToCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::ArgMinMax :
	{
	AddToNonCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::BatchNormalization :
	{
	SetRange(layer, 0, -15.0f, 15.0f);
	AddToCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Normalization:
	{
	SetRange(layer, 0, -15.0f, 15.0f);
	AddToCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Convolution2d:
	{
	SetRange(layer, 0, -15.0f, 15.0f);
	AddToCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::DepthwiseConvolution2d:
	{
	SetRange(layer, 0, -15.0f, 15.0f);
	AddToCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::FullyConnected :
	{
	SetRange(layer, 0, -15.0f, 15.0f);
	AddToCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Permute :
	{
	AddToNonCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::SpaceToBatchNd :
	{
	AddToNonCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Pooling2d :
	{
	AddToNonCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Softmax :
	{
	SetRange(layer, 0, 0.f, 1.f);
	AddToCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Constant :
	{
	if (constants[0].GetDataType() != DataType::Float32)
	{
	throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
	}

	// Work out the range based on the input constants
	unsigned int inputNumElements = constants[0].GetNumElements();
	const float* inputData = reinterpret_cast<const float*>(constants[0].GetMemoryArea());

	float min = std::numeric_limits<float>::max();
	float max = std::numeric_limits<float>::lowest();

	for (unsigned int i = 0; i < inputNumElements; i++)
	{
	const float inputValue = inputData[i];

	min = std::min(min, inputValue);
	max = std::max(max, inputValue);
	}
	SetRange(layer, 0, min, max);
	break;
	}
	case armnn::LayerType::Concat :
	{
	float min = std::numeric_limits<float>::max();
	float max = std::numeric_limits<float>::lowest();
	for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
	{
	const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
	LayerGuid layerId = outputSlot->GetOwningLayerGuid();
	unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
	RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
	min = std::min(min, range.first);
	max = std::max(max, range.second);
	}
	SetRange(layer, 0, min, max);
	AddToCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Reshape :
	{
	AddToNonCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Splitter :
	{
	AddToNonCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Resize :
	{
	AddToNonCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::StridedSlice :
	{
	AddToNonCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::BatchToSpaceNd :
	{
	AddToNonCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Input :
	{
	SetRange(layer, 0, -0.0f, 0.0f);
	AddToCalibratedLayers(layer);
	break;
	}
	case armnn::LayerType::Output :
	{
	AddToNonCalibratedLayers(layer);
	m_OutputLayers.push_back(id);
	break;
	}
	default:
	{}
	}
	}

	const std::vector<LayerBindingId>& DynamicQuantizationStrategy::GetOutputLayers()
	{
	return m_OutputLayers;
	}

	} //namespace armnn