| // |
| // Copyright © 2017 Arm Ltd. All rights reserved. |
| // SPDX-License-Identifier: MIT |
| // |
| |
| #include "NetworkQuantizer.hpp" |
| #include "NetworkQuantizerUtils.hpp" |
| #include "Graph.hpp" |
| #include "Layer.hpp" |
| #include "Network.hpp" |
| #include "DynamicQuantizationStrategy.hpp" |
| #include "StaticRangeStrategy.hpp" |
| #include "QuantizerStrategy.hpp" |
| #include "OverrideInputRangeVisitor.hpp" |
| |
| #include <TensorIOUtils.hpp> |
| |
| #include <armnn/ILayerVisitor.hpp> |
| #include <armnn/INetwork.hpp> |
| #include <armnn/Tensor.hpp> |
| #include <armnn/Types.hpp> |
| |
| #include <armnnUtils/TensorUtils.hpp> |
| #include <armnn/utility/PolymorphicDowncast.hpp> |
| |
| #include <mapbox/variant.hpp> |
| |
| #include <vector> |
| #include <cmath> |
| |
| namespace armnn |
| { |
| |
| using TContainer = |
| mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>; |
| |
| INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options) |
| { |
| return new NetworkQuantizer(inputNetwork, options); |
| } |
| |
| INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork, const QuantizerOptions& options) |
| { |
| return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy); |
| } |
| |
| void INetworkQuantizer::Destroy(INetworkQuantizer *quantizer) |
| { |
| delete PolymorphicDowncast<NetworkQuantizer*>(quantizer); |
| } |
| |
| void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max) |
| { |
| const Graph& graph = m_InputNetwork->pNetworkImpl->GetGraph(); |
| auto inputLayers = graph.GetInputLayers(); |
| |
| // Walk the input layers of the graph and override the quantization parameters of the one with the given id |
| OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max}); |
| VisitLayers(inputLayers, overrideInputRangeVisitor); |
| } |
| |
| void NetworkQuantizer::Refine(const InputTensors& inputTensors) |
| { |
| // The first time Refine is called the m_Runtime and the DynamicQuantizationStrategy |
| // will not have been created. Need to get the environment set up, Runtime loaded, |
| // DynamicQuantizationStrategy created and run over the network to initialise itself |
| // and the RangeTracker the Debug callback registered and an initial inference |
| // done to set up the first min/max values |
| if (!m_Runtime) |
| { |
| m_RefineCount = 0; |
| m_Ranges.SetDynamicMode(true); |
| const Graph& cGraph = m_InputNetwork->pNetworkImpl->GetGraph().TopologicalSort(); |
| |
| // need to insert Debug layers in the DynamicQuantizationStrategy |
| Graph& graph = const_cast<Graph&>(cGraph); |
| |
| // Initialize RangeTracker to the default values for each layer. |
| // The default values are overwritten by the min/max that is |
| // recorded during the first dataset min/max calibration. This |
| // initialisation is only required for the first call of Refine(). |
| m_DynamicQuantizationStrategy = DynamicQuantizationStrategy(m_Ranges, graph); |
| ApplyStrategyToLayers(cGraph, m_DynamicQuantizationStrategy.value()); |
| |
| IRuntime::CreationOptions options; |
| m_Runtime = IRuntime::Create(options); |
| |
| // Optimize network - debug already enabled for layers that require quantization |
| OptimizerOptions optimizerOptions(false, false); |
| std::vector<BackendId> backends = {"CpuRef"}; |
| IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork, |
| backends, |
| m_Runtime->GetDeviceSpec(), |
| optimizerOptions); |
| |
| m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet)); |
| |
| // Debug callback function to refine min/max in RangeTracker |
| auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) { |
| // Get min/max pair from tensor data |
| std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle); |
| |
| // For first calibration dataset, set min/max range in RangeTracker to |
| // min/max ranges gathered during inference |
| if (m_RefineCount == 0) |
| { |
| m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second); |
| } |
| else |
| { |
| // For every other calibration dataset, only set min/max range if the |
| // values gathered are less than / greater than originally recorded. |
| m_Ranges.RefineMin(guid, slotIndex, minMax.first); |
| m_Ranges.RefineMax(guid, slotIndex, minMax.second); |
| } |
| }; |
| |
| m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback); |
| } |
| |
| // Create output tensor for EnqueueWorkload |
| std::vector<armnn::BindingPointInfo> outputBindings; |
| auto outputLayers = m_DynamicQuantizationStrategy.value().GetOutputLayers(); |
| std::vector<TContainer> outputVectors; |
| for (auto outputLayerBindingId : outputLayers) |
| { |
| auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId); |
| outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo)); |
| outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0)); |
| } |
| OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors); |
| |
| // Execute EnqueueWorkload with calibration image |
| m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors); |
| ++m_RefineCount; |
| } |
| |
| INetworkPtr NetworkQuantizer::ExportNetwork() |
| { |
| const Graph& graph = m_InputNetwork->pNetworkImpl->GetGraph().TopologicalSort(); |
| |
| // Step 1) Walk the graph and populate default min/max values for |
| // intermediate tensors, only if Runtime does not exist (created |
| // if Refine has been called) |
| if (!m_Runtime) |
| { |
| m_Ranges.SetDynamicMode(false); |
| StaticRangeStrategy rangeStrategy(m_Ranges); |
| ApplyStrategyToLayers(graph, rangeStrategy); |
| } |
| else |
| { |
| // Set min/max range of non-calibrated layers to parent layer's range |
| m_DynamicQuantizationStrategy.value().VisitNonCalibratedLayers(); |
| // now tear down the runtime and the dynamic visitor. |
| m_Runtime.reset(nullptr); |
| m_DynamicQuantizationStrategy = EmptyOptional(); |
| m_RefineCount = 0; |
| } |
| |
| // Step 2) Convert input InputNetwork to Quantized InputNetwork |
| std::unique_ptr<IQuantizationScheme> quantizationScheme; |
| switch (m_Options.m_ActivationFormat) |
| { |
| case DataType::QAsymmU8: |
| quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>(); |
| break; |
| case DataType::QAsymmS8: |
| quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>(); |
| break; |
| case DataType::QSymmS8: |
| quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>(); |
| break; |
| case DataType::QSymmS16: |
| quantizationScheme = std::make_unique<QSymm16QuantizationScheme>(); |
| break; |
| default: |
| throw InvalidArgumentException("Unsupported quantization target"); |
| } |
| |
| QuantizerStrategy quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType); |
| ApplyStrategyToLayers(graph, quantizerVisitor); |
| |
| // clear the ranges |
| m_Ranges.Reset(); |
| |
| return quantizerVisitor.RetrieveFinalNetwork(); |
| } |
| |
| } //namespace armn |