telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 1 | // |
| 2 | // Copyright © 2017 Arm Ltd. All rights reserved. |
David Beck | ecb56cd | 2018-09-05 12:52:57 +0100 | [diff] [blame^] | 3 | // SPDX-License-Identifier: MIT |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 4 | // |
| 5 | #include "Graph.hpp" |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 6 | #include "LayersFwd.hpp" |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 7 | |
| 8 | #include <armnn/Utils.hpp> |
| 9 | #include <armnn/TypesUtils.hpp> |
| 10 | |
| 11 | #include <boost/polymorphic_cast.hpp> |
| 12 | #include <boost/log/trivial.hpp> |
| 13 | #include <boost/assert.hpp> |
| 14 | #include <boost/format.hpp> |
| 15 | |
| 16 | #include <unordered_map> |
surmeh01 | bceff2f | 2018-03-29 16:29:27 +0100 | [diff] [blame] | 17 | #include <DotSerializer.hpp> |
| 18 | #include <sstream> |
| 19 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 20 | |
| 21 | namespace armnn |
| 22 | { |
| 23 | |
| 24 | Graph::Graph(const Graph& other) |
| 25 | : m_LayersInOrder(other.m_LayersInOrder) |
| 26 | { |
| 27 | std::unordered_map<const Layer*, Layer*> otherToClonedMap; |
| 28 | |
| 29 | for (auto&& otherLayer : other.m_Layers) |
| 30 | { |
| 31 | Layer* const layer = otherLayer->Clone(*this); |
| 32 | otherToClonedMap.emplace(otherLayer, layer); |
| 33 | } |
| 34 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 35 | // Copies slot connections. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 36 | for (auto&& otherLayer : other.m_Layers) |
| 37 | { |
| 38 | Layer* const thisLayer = otherToClonedMap[otherLayer]; |
| 39 | |
| 40 | auto outputSlot = thisLayer->BeginOutputSlots(); |
| 41 | for (auto&& otherOutputSlot : otherLayer->GetOutputSlots()) |
| 42 | { |
| 43 | for (auto&& otherInputSlot : otherOutputSlot.GetConnections()) |
| 44 | { |
| 45 | const Layer& otherTgtLayer = otherInputSlot->GetOwningLayer(); |
| 46 | Layer* const thisTgtLayer = otherToClonedMap[&otherTgtLayer]; |
| 47 | |
| 48 | InputSlot& inputSlot = thisTgtLayer->GetInputSlot(otherInputSlot->GetSlotIndex()); |
| 49 | outputSlot->Connect(inputSlot); |
| 50 | } |
| 51 | outputSlot->SetTensorInfo(otherOutputSlot.GetTensorInfo()); |
| 52 | ++outputSlot; |
| 53 | } |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | Status Graph::Print() const |
| 58 | { |
| 59 | if (m_Layers.empty()) |
| 60 | { |
| 61 | BOOST_LOG_TRIVIAL(info) << "\n Graph is empty.\n"; |
| 62 | return Status::Success; |
| 63 | } |
| 64 | BOOST_LOG_TRIVIAL(info) << "\n"; |
| 65 | BOOST_LOG_TRIVIAL(info) << "Walking Pattern: \n"; |
| 66 | |
| 67 | for (auto&& it : TopologicalSort()) |
| 68 | { |
| 69 | BOOST_LOG_TRIVIAL(info) << it->GetName() << ":" << GetLayerTypeAsCString(it->GetType()) |
| 70 | << ":" << GetComputeDeviceAsCString(it->GetComputeDevice()); |
| 71 | } |
| 72 | BOOST_LOG_TRIVIAL(info) << "\n\n"; |
| 73 | |
| 74 | return Status::Success; |
| 75 | } |
| 76 | |
surmeh01 | bceff2f | 2018-03-29 16:29:27 +0100 | [diff] [blame] | 77 | Status Graph::SerializeToDot(std::ostream& stream) |
| 78 | { |
| 79 | { |
| 80 | DotGraph graph(stream, "Optimized"); |
| 81 | |
| 82 | { |
| 83 | // Default node attributes: |
| 84 | DotDefaults nodes(stream, "node"); |
| 85 | nodes.GetAttributeSet() |
| 86 | .AddAttribute("shape", "record"); |
| 87 | } |
| 88 | |
| 89 | { |
| 90 | // Default edge attributes: |
| 91 | DotDefaults edges(stream, "edge"); |
| 92 | edges.GetAttributeSet() |
| 93 | .AddAttribute("fontsize", 8) |
| 94 | .AddAttribute("fontcolor", "blue") |
| 95 | .AddAttribute("fontname", "arial-bold"); |
| 96 | } |
| 97 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 98 | // First declares the nodes. |
surmeh01 | bceff2f | 2018-03-29 16:29:27 +0100 | [diff] [blame] | 99 | for (auto&& layer : m_Layers) |
| 100 | { |
| 101 | DotNode node(stream, layer->GetGuid(), GetLayerTypeAsCString(layer->GetType())); |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 102 | // Extracts the layer parameters. |
surmeh01 | bceff2f | 2018-03-29 16:29:27 +0100 | [diff] [blame] | 103 | ParameterStringifyFunction extractParams = [&node](const std::string & name, const std::string & value){ |
| 104 | node.GetContents().AddContent(name + " : " + value); |
| 105 | }; |
| 106 | layer->SerializeLayerParameters(extractParams); |
| 107 | } |
| 108 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 109 | // Second declares the edges. |
surmeh01 | bceff2f | 2018-03-29 16:29:27 +0100 | [diff] [blame] | 110 | for (auto&& layer : m_Layers) |
| 111 | { |
| 112 | LayerGuid toId = layer->GetGuid(); |
| 113 | |
| 114 | for (unsigned int i=0;i<layer->GetNumInputSlots(); i++) |
| 115 | { |
| 116 | OutputSlot* outputSlot = static_cast<OutputSlot*>(layer->GetInputSlot(i).GetConnection()); |
| 117 | LayerGuid fromId = outputSlot->GetOwningLayer().GetGuid(); |
| 118 | DotEdge edge(stream, fromId, toId); |
| 119 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 120 | // Now print the tensor shape on the edge. |
surmeh01 | bceff2f | 2018-03-29 16:29:27 +0100 | [diff] [blame] | 121 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 122 | // Constructs the label attribute with HTML markup. |
surmeh01 | bceff2f | 2018-03-29 16:29:27 +0100 | [diff] [blame] | 123 | std::stringstream ss; |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 124 | ss << "< " << outputSlot->GetTensorInfo().GetShape() << " >"; |
surmeh01 | bceff2f | 2018-03-29 16:29:27 +0100 | [diff] [blame] | 125 | edge.GetAttributeSet().AddAttribute("label", ss); |
| 126 | } |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | if (stream.bad()) |
| 132 | { |
| 133 | return Status::Failure; |
| 134 | } |
| 135 | return Status::Success; |
| 136 | } |
| 137 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 138 | Status Graph::AllocateDynamicBuffers() |
| 139 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 140 | // Layers must be sorted in topological order |
| 141 | BOOST_ASSERT(m_LayersInOrder); |
| 142 | |
| 143 | std::unordered_set<const ITensorHandle*> preallocatedTensors; |
| 144 | std::unordered_map<const ITensorHandle*, unsigned int> handleReferenceCounts; |
| 145 | |
| 146 | // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided |
| 147 | // is a TensorHandle, the function just returns it |
| 148 | auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle) |
| 149 | { |
| 150 | ITensorHandle* ancestor = subTensorHandle; |
| 151 | while (ancestor && ancestor->GetParent()) |
| 152 | { |
| 153 | ancestor = ancestor->GetParent(); |
| 154 | } |
| 155 | return ancestor; |
| 156 | }; |
| 157 | |
| 158 | // Checks whether a TensorHandle has been pre-allocated |
| 159 | auto IsPreallocated = [&](ITensorHandle* const tensorHandle) |
| 160 | { |
| 161 | return tensorHandle && preallocatedTensors.find(tensorHandle) != preallocatedTensors.end(); |
| 162 | }; |
| 163 | |
| 164 | // Constant tensor handles need to last from the beginning of execution till the end, |
| 165 | // therefore we pre-allocate them upfront |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 166 | for (auto&& layer : m_Layers) |
| 167 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 168 | if (layer->GetType() == LayerType::Constant) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 169 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 170 | for (auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot) |
| 171 | { |
| 172 | ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()); |
| 173 | |
| 174 | if (tensorHandle && !IsPreallocated(tensorHandle)) |
| 175 | { |
| 176 | tensorHandle->Allocate(); |
| 177 | preallocatedTensors.insert(tensorHandle); |
| 178 | } |
| 179 | } |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 180 | } |
| 181 | } |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 182 | |
| 183 | // Iterate over the network in topological order |
| 184 | for (auto&& layer : m_Layers) |
| 185 | { |
| 186 | // Count the amount of times each output slot references a certain buffer (ITensorHandle). |
| 187 | // The first time we encounter a new tensor handle, we start managing its lifetime. |
| 188 | for (auto&& slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot) |
| 189 | { |
| 190 | ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()); |
| 191 | |
| 192 | if (tensorHandle && !IsPreallocated(tensorHandle)) |
| 193 | { |
| 194 | unsigned int numConnections = slot->GetNumConnections(); |
| 195 | if (handleReferenceCounts.find(tensorHandle) == handleReferenceCounts.end()) |
| 196 | { |
| 197 | handleReferenceCounts[tensorHandle] = numConnections; |
| 198 | tensorHandle->Manage(); |
| 199 | } |
| 200 | else |
| 201 | { |
| 202 | handleReferenceCounts[tensorHandle] += numConnections; |
| 203 | } |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | // Loop through the input slots in the same layer and decrement the reference counter associated |
| 208 | // to each tensor handle we encounter. Once it reaches zero, we end the lifetime of the tensor handle |
| 209 | for (auto&& slot = layer->BeginInputSlots(); slot != layer->EndInputSlots(); ++slot) |
| 210 | { |
| 211 | ITensorHandle *tensorHandle = TraceSubTensorHandleAncestry( |
| 212 | slot->GetConnectedOutputSlot()->GetOutputHandler().GetData()); |
| 213 | |
| 214 | if (tensorHandle && !IsPreallocated(tensorHandle)) |
| 215 | { |
| 216 | --handleReferenceCounts[tensorHandle]; |
| 217 | |
| 218 | if (handleReferenceCounts[tensorHandle] == 0u) |
| 219 | { |
| 220 | // Stop managing lifetime of tensor handle |
| 221 | tensorHandle->Allocate(); |
| 222 | handleReferenceCounts.erase(tensorHandle); |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 228 | return Status::Success; |
| 229 | } |
| 230 | |
| 231 | const Graph& Graph::TopologicalSort() const |
| 232 | { |
| 233 | if (!m_LayersInOrder) |
| 234 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 235 | // Resets layer order. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 236 | for (auto&& it : m_Layers) |
| 237 | { |
| 238 | it->ResetPriority(); |
| 239 | } |
| 240 | |
| 241 | auto compareLayerPriority = [](const LayersList::value_type& layerA, const LayersList::value_type& layerB) |
| 242 | { |
| 243 | return layerA->GetPriority() < layerB->GetPriority(); |
| 244 | }; |
| 245 | |
| 246 | m_Layers.sort(compareLayerPriority); |
| 247 | |
| 248 | m_LayersInOrder = true; |
| 249 | } |
| 250 | |
| 251 | return *this; |
| 252 | } |
| 253 | |
| 254 | void Graph::AddCopyLayers() |
| 255 | { |
| 256 | // Returns true if the given layer could potentially need an intermediate copy layer (depending on its |
| 257 | // connections to other layers). At the time of writing, copy layers will be inserted in the following situations: |
| 258 | // CPU -> CL (and viceversa) |
| 259 | // CPU -> Neon (and viceversa) |
| 260 | auto MayNeedCopyLayer = [](const Layer& layer) |
| 261 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 262 | // All layers should have been associated with a valid compute device at this point. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 263 | BOOST_ASSERT(layer.GetComputeDevice() != Compute::Undefined); |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 264 | // Does not need another copy layer if a copy layer is already present. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 265 | return layer.GetType() != LayerType::MemCopy; |
| 266 | }; |
| 267 | |
| 268 | for (auto&& srcLayer : m_Layers) |
| 269 | { |
| 270 | if (MayNeedCopyLayer(*srcLayer)) |
| 271 | { |
| 272 | unsigned int srcOutputIndex = 0; |
| 273 | for (auto&& srcOutput : srcLayer->GetOutputSlots()) |
| 274 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 275 | std::vector<InputSlot*> connectionCopy = srcOutput.GetConnections(); |
| 276 | for (auto&& dstInput : connectionCopy) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 277 | { |
| 278 | Layer& dstLayer = dstInput->GetOwningLayer(); |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 279 | if (MayNeedCopyLayer(dstLayer) && (dstLayer.GetComputeDevice() != srcLayer->GetComputeDevice())) |
| 280 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 281 | // A copy layer is needed in between the source and destination layers. |
| 282 | // Record the operation rather than attempting to modify the graph as we go. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 283 | // (invalidating iterators) |
| 284 | const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]") |
| 285 | % srcLayer->GetName() |
| 286 | % srcOutputIndex |
| 287 | % dstLayer.GetName() |
| 288 | % dstInput->GetSlotIndex()); |
| 289 | |
| 290 | MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInput, copyLayerName.c_str()); |
| 291 | copyLayer->SetComputeDevice(dstLayer.GetComputeDevice()); |
| 292 | } |
| 293 | } |
| 294 | ++srcOutputIndex; |
| 295 | } |
| 296 | } |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | void Graph::InferTensorInfos() |
| 301 | { |
| 302 | for (auto&& layer : TopologicalSort()) |
| 303 | { |
| 304 | for (auto&& input : layer->GetInputSlots()) |
| 305 | { |
| 306 | boost::ignore_unused(input); |
| 307 | BOOST_ASSERT_MSG(input.GetConnectedOutputSlot()->IsTensorInfoSet(), |
| 308 | "All inputs must have the TensorInfo set at this point."); |
| 309 | } |
| 310 | layer->ValidateTensorShapesFromInputs(); |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | } // namespace armnn |