blob: d354a0e4410d6826f53662f54c3b74e8bd97b087 [file] [log] [blame]
Finn Williamsb454c5c2021-02-09 15:56:23 +00001//
2// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "DynamicQuantizationStrategy.hpp"
7#include "NetworkUtils.hpp"
8
9#include <armnn/Descriptors.hpp>
10#include <armnn/utility/IgnoreUnused.hpp>
11#include <armnn/utility/PolymorphicDowncast.hpp>
12#include <armnn/Types.hpp>
13
14#include <limits>
15
16namespace armnn
17{
18DynamicQuantizationStrategy::DynamicQuantizationStrategy(RangeTracker& rangeTracker, Graph& graph)
19 : m_RangeTracker(rangeTracker),
20 m_Graph(graph)
21{}
22
23void DynamicQuantizationStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
24{
25 m_RangeTracker.SetRange(layer, outputIdx, min, max);
26}
27
28void DynamicQuantizationStrategy::ForwardParentParameters(const IConnectableLayer* layer)
29{
30 for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
31 {
32 const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
33 LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
34 unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
35 const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
36 SetRange(layer, i, parentRange.first, parentRange.second);
37 }
38}
39
40void DynamicQuantizationStrategy::AddToCalibratedLayers(const IConnectableLayer* layer)
41{
42 m_LayersToCalibrate.push_back(layer);
43}
44
45void DynamicQuantizationStrategy::AddToNonCalibratedLayers(const IConnectableLayer* layer)
46{
47 m_LayersNotToCalibrate.push_back(layer);
48}
49
50void DynamicQuantizationStrategy::FinishStrategy()
51{
52 for (const IConnectableLayer* layer : m_LayersToCalibrate)
53 {
54 std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
55 m_Graph, *PolymorphicDowncast<Layer*>(const_cast<IConnectableLayer*>(layer)));
56 // record them so we can take them out again efficiently afterward
57 m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
58 }
59}
60
61void DynamicQuantizationStrategy::RemoveDebugLayers()
62{
63 for (DebugLayer* debugLayer : m_DebugLayers)
64 {
65 OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
66 proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
67
68 for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
69 {
70 debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
71 proceedingOutputSlot.Connect(*succeedingInputSlot);
72 }
73 m_Graph.EraseLayer(debugLayer);
74 }
75 m_DebugLayers.clear();
76}
77
78void DynamicQuantizationStrategy::VisitNonCalibratedLayers() {
79 RemoveDebugLayers();
80 for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
81 {
82 ForwardParentParameters(layer);
83 }
84}
85
86
87void DynamicQuantizationStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer,
88 const BaseDescriptor& descriptor,
89 const std::vector<armnn::ConstTensor>& constants,
90 const char* name,
91 const armnn::LayerBindingId id)
92{
93 IgnoreUnused(name);
94 IgnoreUnused(id);
95 IgnoreUnused(descriptor);
96
97 switch (layer->GetType())
98 {
99 case armnn::LayerType::Activation :
100 {
101 const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor);
102 switch (activationDescriptor.m_Function)
103 {
104 // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
105 case ActivationFunction::Abs:
106 case ActivationFunction::Linear:
107 case ActivationFunction::ReLu:
108 case ActivationFunction::SoftReLu:
109 SetRange(layer, 0, 0.f, 15.f);
110 break;
111 case ActivationFunction::BoundedReLu:
112 SetRange(layer, 0, 0.f, activationDescriptor.m_A);
113 break;
114 case ActivationFunction::TanH:
115 SetRange(layer, 0, -1.f, 1.f);
116 break;
117 case ActivationFunction::LeakyReLu:
118 SetRange(layer, 0, -5.f, 15.f);
119 break;
120 default:
121 SetRange(layer, 0, -15.f, 15.f);
122 break;
123 }
124 break;
125 }
126 case armnn::LayerType::Addition :
127 {
128 SetRange(layer, 0, -20.f, 20.f);
129 AddToCalibratedLayers(layer);
130 break;
131 }
132 case armnn::LayerType::ArgMinMax :
133 {
134 AddToNonCalibratedLayers(layer);
135 break;
136 }
137 case armnn::LayerType::BatchNormalization :
138 {
139 SetRange(layer, 0, -15.0f, 15.0f);
140 AddToCalibratedLayers(layer);
141 break;
142 }
143 case armnn::LayerType::Normalization:
144 {
145 SetRange(layer, 0, -15.0f, 15.0f);
146 AddToCalibratedLayers(layer);
147 break;
148 }
149 case armnn::LayerType::Convolution2d:
150 {
151 SetRange(layer, 0, -15.0f, 15.0f);
152 AddToCalibratedLayers(layer);
153 break;
154 }
155 case armnn::LayerType::DepthwiseConvolution2d:
156 {
157 SetRange(layer, 0, -15.0f, 15.0f);
158 AddToCalibratedLayers(layer);
159 break;
160 }
161 case armnn::LayerType::FullyConnected :
162 {
163 SetRange(layer, 0, -15.0f, 15.0f);
164 AddToCalibratedLayers(layer);
165 break;
166 }
167 case armnn::LayerType::Permute :
168 {
169 AddToNonCalibratedLayers(layer);
170 break;
171 }
172 case armnn::LayerType::SpaceToBatchNd :
173 {
174 AddToNonCalibratedLayers(layer);
175 break;
176 }
177 case armnn::LayerType::Pooling2d :
178 {
179 AddToNonCalibratedLayers(layer);
180 break;
181 }
182 case armnn::LayerType::Softmax :
183 {
184 SetRange(layer, 0, 0.f, 1.f);
185 AddToCalibratedLayers(layer);
186 break;
187 }
188 case armnn::LayerType::Constant :
189 {
190 if (constants[0].GetDataType() != DataType::Float32)
191 {
192 throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
193 }
194
195 // Work out the range based on the input constants
196 unsigned int inputNumElements = constants[0].GetNumElements();
197 const float* inputData = reinterpret_cast<const float*>(constants[0].GetMemoryArea());
198
199 float min = std::numeric_limits<float>::max();
200 float max = std::numeric_limits<float>::lowest();
201
202 for (unsigned int i = 0; i < inputNumElements; i++)
203 {
204 const float inputValue = inputData[i];
205
206 min = std::min(min, inputValue);
207 max = std::max(max, inputValue);
208 }
209 SetRange(layer, 0, min, max);
210 break;
211 }
212 case armnn::LayerType::Concat :
213 {
214 float min = std::numeric_limits<float>::max();
215 float max = std::numeric_limits<float>::lowest();
216 for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
217 {
218 const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
219 LayerGuid layerId = outputSlot->GetOwningLayerGuid();
220 unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
221 RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
222 min = std::min(min, range.first);
223 max = std::max(max, range.second);
224 }
225 SetRange(layer, 0, min, max);
226 AddToCalibratedLayers(layer);
227 break;
228 }
229 case armnn::LayerType::Reshape :
230 {
231 AddToNonCalibratedLayers(layer);
232 break;
233 }
234 case armnn::LayerType::Splitter :
235 {
236 AddToNonCalibratedLayers(layer);
237 break;
238 }
239 case armnn::LayerType::Resize :
240 {
241 AddToNonCalibratedLayers(layer);
242 break;
243 }
244 case armnn::LayerType::StridedSlice :
245 {
246 AddToNonCalibratedLayers(layer);
247 break;
248 }
249 case armnn::LayerType::BatchToSpaceNd :
250 {
251 AddToNonCalibratedLayers(layer);
252 break;
253 }
254 case armnn::LayerType::Input :
255 {
256 SetRange(layer, 0, -0.0f, 0.0f);
257 AddToCalibratedLayers(layer);
258 break;
259 }
260 case armnn::LayerType::Output :
261 {
262 AddToNonCalibratedLayers(layer);
263 m_OutputLayers.push_back(id);
264 break;
265 }
266 default:
267 {}
268 }
269}
270
271const std::vector<LayerBindingId>& DynamicQuantizationStrategy::GetOutputLayers()
272{
273 return m_OutputLayers;
274}
275
276} //namespace armnn