blob: fd4486bc3186de6c1848b89c15c79a7aa4c086c7 [file] [log] [blame]
//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "NetworkQuantizer.hpp"
#include "NetworkQuantizerUtils.hpp"
#include "Graph.hpp"
#include "Layer.hpp"
#include "Network.hpp"
#include "DynamicQuantizationStrategy.hpp"
#include "StaticRangeStrategy.hpp"
#include "QuantizerStrategy.hpp"
#include "OverrideInputRangeVisitor.hpp"
#include <TensorIOUtils.hpp>
#include <armnn/ILayerVisitor.hpp>
#include <armnn/INetwork.hpp>
#include <armnn/Tensor.hpp>
#include <armnn/Types.hpp>
#include <armnnUtils/TensorUtils.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <mapbox/variant.hpp>
#include <vector>
#include <cmath>
namespace armnn
{
using TContainer =
mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options)
{
return new NetworkQuantizer(inputNetwork, options);
}
INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork, const QuantizerOptions& options)
{
return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy);
}
void INetworkQuantizer::Destroy(INetworkQuantizer *quantizer)
{
delete PolymorphicDowncast<NetworkQuantizer*>(quantizer);
}
void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max)
{
const Graph& graph = m_InputNetwork->pNetworkImpl->GetGraph();
auto inputLayers = graph.GetInputLayers();
// Walk the input layers of the graph and override the quantization parameters of the one with the given id
OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
VisitLayers(inputLayers, overrideInputRangeVisitor);
}
void NetworkQuantizer::Refine(const InputTensors& inputTensors)
{
// The first time Refine is called the m_Runtime and the DynamicQuantizationStrategy
// will not have been created. Need to get the environment set up, Runtime loaded,
// DynamicQuantizationStrategy created and run over the network to initialise itself
// and the RangeTracker the Debug callback registered and an initial inference
// done to set up the first min/max values
if (!m_Runtime)
{
m_RefineCount = 0;
m_Ranges.SetDynamicMode(true);
const Graph& cGraph = m_InputNetwork->pNetworkImpl->GetGraph().TopologicalSort();
// need to insert Debug layers in the DynamicQuantizationStrategy
Graph& graph = const_cast<Graph&>(cGraph);
// Initialize RangeTracker to the default values for each layer.
// The default values are overwritten by the min/max that is
// recorded during the first dataset min/max calibration. This
// initialisation is only required for the first call of Refine().
m_DynamicQuantizationStrategy = DynamicQuantizationStrategy(m_Ranges, graph);
ApplyStrategyToLayers(cGraph, m_DynamicQuantizationStrategy.value());
IRuntime::CreationOptions options;
m_Runtime = IRuntime::Create(options);
// Optimize network - debug already enabled for layers that require quantization
OptimizerOptions optimizerOptions(false, false);
std::vector<BackendId> backends = {"CpuRef"};
IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
backends,
m_Runtime->GetDeviceSpec(),
optimizerOptions);
m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
// Debug callback function to refine min/max in RangeTracker
auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
// Get min/max pair from tensor data
std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
// For first calibration dataset, set min/max range in RangeTracker to
// min/max ranges gathered during inference
if (m_RefineCount == 0)
{
m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
}
else
{
// For every other calibration dataset, only set min/max range if the
// values gathered are less than / greater than originally recorded.
m_Ranges.RefineMin(guid, slotIndex, minMax.first);
m_Ranges.RefineMax(guid, slotIndex, minMax.second);
}
};
m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
}
// Create output tensor for EnqueueWorkload
std::vector<armnn::BindingPointInfo> outputBindings;
auto outputLayers = m_DynamicQuantizationStrategy.value().GetOutputLayers();
std::vector<TContainer> outputVectors;
for (auto outputLayerBindingId : outputLayers)
{
auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
}
OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
// Execute EnqueueWorkload with calibration image
m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
++m_RefineCount;
}
INetworkPtr NetworkQuantizer::ExportNetwork()
{
const Graph& graph = m_InputNetwork->pNetworkImpl->GetGraph().TopologicalSort();
// Step 1) Walk the graph and populate default min/max values for
// intermediate tensors, only if Runtime does not exist (created
// if Refine has been called)
if (!m_Runtime)
{
m_Ranges.SetDynamicMode(false);
StaticRangeStrategy rangeStrategy(m_Ranges);
ApplyStrategyToLayers(graph, rangeStrategy);
}
else
{
// Set min/max range of non-calibrated layers to parent layer's range
m_DynamicQuantizationStrategy.value().VisitNonCalibratedLayers();
// now tear down the runtime and the dynamic visitor.
m_Runtime.reset(nullptr);
m_DynamicQuantizationStrategy = EmptyOptional();
m_RefineCount = 0;
}
// Step 2) Convert input InputNetwork to Quantized InputNetwork
std::unique_ptr<IQuantizationScheme> quantizationScheme;
switch (m_Options.m_ActivationFormat)
{
case DataType::QAsymmU8:
quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
break;
case DataType::QAsymmS8:
quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
break;
case DataType::QSymmS8:
quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
break;
case DataType::QSymmS16:
quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
break;
default:
throw InvalidArgumentException("Unsupported quantization target");
}
QuantizerStrategy quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
ApplyStrategyToLayers(graph, quantizerVisitor);
// clear the ranges
m_Ranges.Reset();
return quantizerVisitor.RetrieveFinalNetwork();
}
} //namespace armn