Blame - src/armnn/DynamicQuantizationStrategy.cpp - ml/armnn

blob: d354a0e4410d6826f53662f54c3b74e8bd97b087 [file] [log] [blame]

Finn Williams	b454c5c	2021-02-09 15:56:23 +0000	[diff] [blame]	1	//
				2	// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "DynamicQuantizationStrategy.hpp"
				7	#include "NetworkUtils.hpp"
				8
				9	#include <armnn/Descriptors.hpp>
				10	#include <armnn/utility/IgnoreUnused.hpp>
				11	#include <armnn/utility/PolymorphicDowncast.hpp>
				12	#include <armnn/Types.hpp>
				13
				14	#include <limits>
				15
				16	namespace armnn
				17	{
				18	DynamicQuantizationStrategy::DynamicQuantizationStrategy(RangeTracker& rangeTracker, Graph& graph)
				19	: m_RangeTracker(rangeTracker),
				20	m_Graph(graph)
				21	{}
				22
				23	void DynamicQuantizationStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
				24	{
				25	m_RangeTracker.SetRange(layer, outputIdx, min, max);
				26	}
				27
				28	void DynamicQuantizationStrategy::ForwardParentParameters(const IConnectableLayer* layer)
				29	{
				30	for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
				31	{
				32	const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
				33	LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
				34	unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
				35	const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
				36	SetRange(layer, i, parentRange.first, parentRange.second);
				37	}
				38	}
				39
				40	void DynamicQuantizationStrategy::AddToCalibratedLayers(const IConnectableLayer* layer)
				41	{
				42	m_LayersToCalibrate.push_back(layer);
				43	}
				44
				45	void DynamicQuantizationStrategy::AddToNonCalibratedLayers(const IConnectableLayer* layer)
				46	{
				47	m_LayersNotToCalibrate.push_back(layer);
				48	}
				49
				50	void DynamicQuantizationStrategy::FinishStrategy()
				51	{
				52	for (const IConnectableLayer* layer : m_LayersToCalibrate)
				53	{
				54	std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
				55	m_Graph, PolymorphicDowncast<Layer>(const_cast<IConnectableLayer*>(layer)));
				56	// record them so we can take them out again efficiently afterward
				57	m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
				58	}
				59	}
				60
				61	void DynamicQuantizationStrategy::RemoveDebugLayers()
				62	{
				63	for (DebugLayer* debugLayer : m_DebugLayers)
				64	{
				65	OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
				66	proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
				67
				68	for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
				69	{
				70	debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
				71	proceedingOutputSlot.Connect(*succeedingInputSlot);
				72	}
				73	m_Graph.EraseLayer(debugLayer);
				74	}
				75	m_DebugLayers.clear();
				76	}
				77
				78	void DynamicQuantizationStrategy::VisitNonCalibratedLayers() {
				79	RemoveDebugLayers();
				80	for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
				81	{
				82	ForwardParentParameters(layer);
				83	}
				84	}
				85
				86
				87	void DynamicQuantizationStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer,
				88	const BaseDescriptor& descriptor,
				89	const std::vector<armnn::ConstTensor>& constants,
				90	const char* name,
				91	const armnn::LayerBindingId id)
				92	{
				93	IgnoreUnused(name);
				94	IgnoreUnused(id);
				95	IgnoreUnused(descriptor);
				96
				97	switch (layer->GetType())
				98	{
				99	case armnn::LayerType::Activation :
				100	{
				101	const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor);
				102	switch (activationDescriptor.m_Function)
				103	{
				104	// Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
				105	case ActivationFunction::Abs:
				106	case ActivationFunction::Linear:
				107	case ActivationFunction::ReLu:
				108	case ActivationFunction::SoftReLu:
				109	SetRange(layer, 0, 0.f, 15.f);
				110	break;
				111	case ActivationFunction::BoundedReLu:
				112	SetRange(layer, 0, 0.f, activationDescriptor.m_A);
				113	break;
				114	case ActivationFunction::TanH:
				115	SetRange(layer, 0, -1.f, 1.f);
				116	break;
				117	case ActivationFunction::LeakyReLu:
				118	SetRange(layer, 0, -5.f, 15.f);
				119	break;
				120	default:
				121	SetRange(layer, 0, -15.f, 15.f);
				122	break;
				123	}
				124	break;
				125	}
				126	case armnn::LayerType::Addition :
				127	{
				128	SetRange(layer, 0, -20.f, 20.f);
				129	AddToCalibratedLayers(layer);
				130	break;
				131	}
				132	case armnn::LayerType::ArgMinMax :
				133	{
				134	AddToNonCalibratedLayers(layer);
				135	break;
				136	}
				137	case armnn::LayerType::BatchNormalization :
				138	{
				139	SetRange(layer, 0, -15.0f, 15.0f);
				140	AddToCalibratedLayers(layer);
				141	break;
				142	}
				143	case armnn::LayerType::Normalization:
				144	{
				145	SetRange(layer, 0, -15.0f, 15.0f);
				146	AddToCalibratedLayers(layer);
				147	break;
				148	}
				149	case armnn::LayerType::Convolution2d:
				150	{
				151	SetRange(layer, 0, -15.0f, 15.0f);
				152	AddToCalibratedLayers(layer);
				153	break;
				154	}
				155	case armnn::LayerType::DepthwiseConvolution2d:
				156	{
				157	SetRange(layer, 0, -15.0f, 15.0f);
				158	AddToCalibratedLayers(layer);
				159	break;
				160	}
				161	case armnn::LayerType::FullyConnected :
				162	{
				163	SetRange(layer, 0, -15.0f, 15.0f);
				164	AddToCalibratedLayers(layer);
				165	break;
				166	}
				167	case armnn::LayerType::Permute :
				168	{
				169	AddToNonCalibratedLayers(layer);
				170	break;
				171	}
				172	case armnn::LayerType::SpaceToBatchNd :
				173	{
				174	AddToNonCalibratedLayers(layer);
				175	break;
				176	}
				177	case armnn::LayerType::Pooling2d :
				178	{
				179	AddToNonCalibratedLayers(layer);
				180	break;
				181	}
				182	case armnn::LayerType::Softmax :
				183	{
				184	SetRange(layer, 0, 0.f, 1.f);
				185	AddToCalibratedLayers(layer);
				186	break;
				187	}
				188	case armnn::LayerType::Constant :
				189	{
				190	if (constants[0].GetDataType() != DataType::Float32)
				191	{
				192	throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
				193	}
				194
				195	// Work out the range based on the input constants
				196	unsigned int inputNumElements = constants[0].GetNumElements();
				197	const float* inputData = reinterpret_cast<const float*>(constants[0].GetMemoryArea());
				198
				199	float min = std::numeric_limits<float>::max();
				200	float max = std::numeric_limits<float>::lowest();
				201
				202	for (unsigned int i = 0; i < inputNumElements; i++)
				203	{
				204	const float inputValue = inputData[i];
				205
				206	min = std::min(min, inputValue);
				207	max = std::max(max, inputValue);
				208	}
				209	SetRange(layer, 0, min, max);
				210	break;
				211	}
				212	case armnn::LayerType::Concat :
				213	{
				214	float min = std::numeric_limits<float>::max();
				215	float max = std::numeric_limits<float>::lowest();
				216	for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
				217	{
				218	const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
				219	LayerGuid layerId = outputSlot->GetOwningLayerGuid();
				220	unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
				221	RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
				222	min = std::min(min, range.first);
				223	max = std::max(max, range.second);
				224	}
				225	SetRange(layer, 0, min, max);
				226	AddToCalibratedLayers(layer);
				227	break;
				228	}
				229	case armnn::LayerType::Reshape :
				230	{
				231	AddToNonCalibratedLayers(layer);
				232	break;
				233	}
				234	case armnn::LayerType::Splitter :
				235	{
				236	AddToNonCalibratedLayers(layer);
				237	break;
				238	}
				239	case armnn::LayerType::Resize :
				240	{
				241	AddToNonCalibratedLayers(layer);
				242	break;
				243	}
				244	case armnn::LayerType::StridedSlice :
				245	{
				246	AddToNonCalibratedLayers(layer);
				247	break;
				248	}
				249	case armnn::LayerType::BatchToSpaceNd :
				250	{
				251	AddToNonCalibratedLayers(layer);
				252	break;
				253	}
				254	case armnn::LayerType::Input :
				255	{
				256	SetRange(layer, 0, -0.0f, 0.0f);
				257	AddToCalibratedLayers(layer);
				258	break;
				259	}
				260	case armnn::LayerType::Output :
				261	{
				262	AddToNonCalibratedLayers(layer);
				263	m_OutputLayers.push_back(id);
				264	break;
				265	}
				266	default:
				267	{}
				268	}
				269	}
				270
				271	const std::vector<LayerBindingId>& DynamicQuantizationStrategy::GetOutputLayers()
				272	{
				273	return m_OutputLayers;
				274	}
				275
				276	} //namespace armnn