blob: 3f61650f8b9732aaf9518a20266199fd1383fd35 [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5#include "CaffeParser.hpp"
telsoa01c577f2c2018-08-31 09:22:23 +01006#include "RecordByRecordCaffeParser.hpp"
telsoa014fcda012018-03-09 14:13:49 +00007
8#include "armnn/Descriptors.hpp"
9#include "armnn/INetwork.hpp"
10#include "armnn/Utils.hpp"
11#include "armnn/Exceptions.hpp"
12
13#include "GraphTopologicalSort.hpp"
telsoa01c577f2c2018-08-31 09:22:23 +010014#include "VerificationHelpers.hpp"
telsoa014fcda012018-03-09 14:13:49 +000015
16#include <boost/numeric/conversion/cast.hpp>
17#include <boost/assert.hpp>
18#include <boost/format.hpp>
19#include <boost/log/trivial.hpp>
20
21// Caffe
22#include "caffe/proto/caffe.pb.h"
23
24// ProtoBuf
25#include <google/protobuf/io/coded_stream.h>
26#include <google/protobuf/io/zero_copy_stream.h>
27#include <google/protobuf/io/zero_copy_stream_impl.h>
28#include <google/protobuf/text_format.h>
29#include <google/protobuf/stubs/common.h>
30#include <google/protobuf/stubs/once.h>
31#include <google/protobuf/io/coded_stream.h>
32#include <google/protobuf/wire_format_lite_inl.h>
33#include <google/protobuf/descriptor.h>
34#include <google/protobuf/generated_message_reflection.h>
35#include <google/protobuf/reflection_ops.h>
36#include <google/protobuf/wire_format.h>
37
38#include <cmath>
39#include <sstream>
40#include <queue>
41#include <fcntl.h>
42
43/// Caffe networks are loaded from protobuf files (binary or text) using the protobuf library and the generated
44/// code from caffe.pb.h. This gives us a caffe::NetParameter which is an in-memory version of the file.
45/// This contains a flat list of Caffe 'layers' (e.g. convolution, pooling etc.).
46/// Each layer has inputs (called "bottoms") and outputs (called "tops"). Data flows from bottom to top.
47/// The bottoms of a layer refer to the tops of other layers, not their names.
telsoa01c577f2c2018-08-31 09:22:23 +010048/// The names of layers seem to be arbitrary (you could rename a layer and the network wouldn't
49/// need any other changes).
telsoa014fcda012018-03-09 14:13:49 +000050///
51/// Some layers (e.g. Relu) can be configured so that their top and bottom are both the same. This is called an
52/// "in-place" layer and is a Caffe runtime feature used to reduce memory usage by modifying tensors in-place.
53/// This isn't relevant to the parser and so we preprocess these layers to convert them to regular layers, to result
54/// in a consistent graph structure.
55
56namespace armnnCaffeParser
57{
58
59using namespace armnn;
60using namespace caffe;
61using namespace std;
62using namespace google::protobuf::io;
63
telsoa01c577f2c2018-08-31 09:22:23 +010064namespace
telsoa014fcda012018-03-09 14:13:49 +000065{
66
telsoa01c577f2c2018-08-31 09:22:23 +010067const float* GetArrayPtrFromBlob(const LayerParameter& layerParam, unsigned int blobIndex)
telsoa014fcda012018-03-09 14:13:49 +000068{
telsoa01c577f2c2018-08-31 09:22:23 +010069 auto nBlobs = layerParam.blobs_size();
70 if (blobIndex >= boost::numeric_cast<unsigned int>(nBlobs))
telsoa014fcda012018-03-09 14:13:49 +000071 {
telsoa01c577f2c2018-08-31 09:22:23 +010072 throw ParseException(
73 boost::str(
74 boost::format(
75 "Expected data blob at index %1% in layer %2% not found. nBlobs=%2%. %4%") %
76 blobIndex %
77 layerParam.name() %
78 nBlobs %
79 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +000080 }
81
82 const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(blobIndex));
83
telsoa01c577f2c2018-08-31 09:22:23 +010084 const float* arrayPtr = blob.data().data();
85 return arrayPtr;
86}
87
88void GetDataFromBlob(const LayerParameter& layerParam, vector<float>& outData, unsigned int blobIndex)
89{
90 auto nBlobs = layerParam.blobs_size();
91 if (blobIndex >= boost::numeric_cast<unsigned int>(nBlobs))
telsoa014fcda012018-03-09 14:13:49 +000092 {
telsoa01c577f2c2018-08-31 09:22:23 +010093 throw ParseException(
94 boost::str(
95 boost::format(
96 "Expected data blob at index %1% in layer %2% not found. %3%") %
97 blobIndex %
98 layerParam.name() %
99 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000100 }
101
telsoa01c577f2c2018-08-31 09:22:23 +0100102 const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(blobIndex));
103
104 size_t blobSize = boost::numeric_cast<size_t>(blob.data_size());
105 if (blobSize != outData.size())
telsoa014fcda012018-03-09 14:13:49 +0000106 {
telsoa01c577f2c2018-08-31 09:22:23 +0100107 throw ParseException(
108 boost::str(
109 boost::format(
110 "Data blob at index %1% in layer %2% has an unexpected size. "
111 "Expected %3% elements but got %4% elements. %5%") %
112 blobIndex %
113 layerParam.name() %
114 outData.size() %
115 blobSize %
116 CHECK_LOCATION().AsString()));
117 }
118
119 int outSizeInt = boost::numeric_cast<int>(outData.size());
120 for (int i = 0; i < outSizeInt; ++i)
121 {
122 outData[static_cast<size_t>(i)] = blob.data(i);
telsoa014fcda012018-03-09 14:13:49 +0000123 }
124}
125
126bool IsInRange(unsigned int value, unsigned int min, unsigned int max)
127{
128 return (value >= min && value <= max) ? true : false;
129}
130
131template <typename T>
132size_t SizeOfVectorData(const vector<T>& vec)
133{
134 return vec.size() * sizeof(T);
135}
136
137void ValidateNumInputsOutputs(const caffe::LayerParameter& layerParameter,
138 unsigned int numInputs,
139 unsigned int numOutputs)
140{
141 int numInputsActual = layerParameter.bottom_size();
142 if (numInputs != boost::numeric_cast<unsigned int>(numInputsActual))
143 {
telsoa01c577f2c2018-08-31 09:22:23 +0100144 throw ParseException(
145 boost::str(
146 boost::format("Invalid number of inputs requested %1% for layer %2% "
147 "while only %3% present. %4%") %
148 numInputs %
149 layerParameter.name() %
150 numInputsActual %
151 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000152 }
153
154 int numOutputsActual = layerParameter.top_size();
155 if (numOutputs != boost::numeric_cast<unsigned int>(numOutputsActual))
156 {
telsoa01c577f2c2018-08-31 09:22:23 +0100157 throw ParseException(
158 boost::str(
159 boost::format("Invalid number of outputs requested %1% for layer %2% "
160 "while only %3% present. %4%") %
161 numOutputs %
162 layerParameter.name() %
163 numOutputsActual %
164 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000165 }
166}
167
telsoa01c577f2c2018-08-31 09:22:23 +0100168template <typename ParamType, typename ExtractOptional, typename ExtractFallback, typename ValueType>
169ValueType GetOptionalWithFallback(const ParamType& param,
170 ExtractOptional extractOptional,
171 ExtractFallback extractFallback,
172 ValueType defaultValue)
173{
174 auto optValue = extractOptional(param, defaultValue);
175 if (optValue.first)
176 {
177 return optValue.second;
178 }
179 auto fallbackValue = extractFallback(param, defaultValue);
180 return fallbackValue.second;
181}
182
183#define GET_OPTIONAL_WITH_VECTOR_FALLBACK(PARAM, \
184 PARAM_TYPE, \
185 OPTIONAL_VALUE, \
186 FALLBACK_VECTOR, \
187 VALUE_TYPE, \
188 DEFAULT_VALUE) \
189 GetOptionalWithFallback( \
190 PARAM, \
191 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
192 { \
193 if (param.has_##OPTIONAL_VALUE ()) \
194 { \
195 return std::make_pair(true, param.OPTIONAL_VALUE ()); \
196 } \
197 else \
198 { \
199 return std::make_pair(false, defaultValue); \
200 } \
201 }, \
202 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
203 { \
204 if (param.FALLBACK_VECTOR##_size() > 0) \
205 { \
206 return std::make_pair(true, (param.FALLBACK_VECTOR ()).Get(0)); \
207 } \
208 else \
209 { \
210 return std::make_pair(false, defaultValue); \
211 } \
212 }, \
213 DEFAULT_VALUE)
214
215#define GET_OPTIONAL_WITH_FALLBACK(PARAM, \
216 PARAM_TYPE, \
217 OPTIONAL_VALUE, \
218 FALLBACK_VALUE, \
219 VALUE_TYPE, \
220 DEFAULT_VALUE) \
221 GetOptionalWithFallback( \
222 PARAM, \
223 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
224 { \
225 if (param.has_##OPTIONAL_VALUE ()) \
226 { \
227 return std::make_pair(true, param.OPTIONAL_VALUE ()); \
228 } \
229 else \
230 { \
231 return std::make_pair(false, defaultValue); \
232 } \
233 }, \
234 [](const PARAM_TYPE & param, VALUE_TYPE defaultValue) \
235 { \
236 if (param.has_##FALLBACK_VALUE ()) \
237 { \
238 return std::make_pair(true, param.FALLBACK_VALUE ()); \
239 } \
240 else \
241 { \
242 return std::make_pair(false, defaultValue); \
243 } \
244 }, \
245 DEFAULT_VALUE)
246
247
248void ValidateEqualValuesInRange(unsigned int valueA,
249 const char* valueNameA,
250 unsigned int valueB,
251 const char* valueNameB,
252 unsigned int min,
253 unsigned int max,
254 const armnn::CheckLocation& location)
255{
256 if (!IsInRange(valueA, min, max) || !IsInRange(valueB, min, max) || (valueA != valueB))
257 {
258 throw ParseException(
259 boost::str(
260 boost::format(
261 "%1%=%2% and %3%=%4% must be equal and within the valid range"
262 "of [%5%, %6%] %7%") %
263 valueNameA %
264 valueA %
265 valueNameB %
266 valueB %
267 min %
268 max %
269 location.AsString()));
270 }
271}
272
273#define VALIDATE_EQUAL_VALUES_IN_RANGE(A, B, MIN_RANGE, MAX_RANGE) \
274 ValidateEqualValuesInRange(A, #A, B, #B, MIN_RANGE, MAX_RANGE, CHECK_LOCATION())
275
276} // namespace <anonymous>
277
278const std::map<std::string, CaffeParserBase::OperationParsingFunction>
279 CaffeParserBase::ms_CaffeLayerNameToParsingFunctions = {
280 { "Input", &CaffeParserBase::ParseInputLayer },
281 { "Convolution", &CaffeParserBase::ParseConvLayer },
282 { "Pooling", &CaffeParserBase::ParsePoolingLayer },
283 { "ReLU", &CaffeParserBase::ParseReluLayer },
284 { "LRN", &CaffeParserBase::ParseLRNLayer },
285 { "InnerProduct", &CaffeParserBase::ParseInnerProductLayer },
286 { "Softmax", &CaffeParserBase::ParseSoftmaxLayer },
287 { "Eltwise", &CaffeParserBase::ParseEltwiseLayer },
288 { "Concat", &CaffeParserBase::ParseConcatLayer },
289 { "BatchNorm", &CaffeParserBase::ParseBatchNormLayer },
290 { "Scale", &CaffeParserBase::ParseScaleLayer },
291 { "Split", &CaffeParserBase::ParseSplitLayer },
292 { "Dropout", &CaffeParserBase::ParseDropoutLayer},
293};
294
295ICaffeParser* ICaffeParser::CreateRaw()
296{
297 return new RecordByRecordCaffeParser();
298}
299
300ICaffeParserPtr ICaffeParser::Create()
301{
302 return ICaffeParserPtr(CreateRaw(), &ICaffeParser::Destroy);
303}
304
305void ICaffeParser::Destroy(ICaffeParser* parser)
306{
307 delete parser;
308}
309
310CaffeParserBase::CaffeParserBase()
311 : m_Network(nullptr, nullptr)
312{
313
314}
315
316CaffeParser::CaffeParser()
317: CaffeParserBase()
318{
319
320}
321
322BindingPointInfo CaffeParserBase::GetNetworkInputBindingInfo(const std::string& name) const
telsoa014fcda012018-03-09 14:13:49 +0000323{
324 return GetBindingInfo(name, "input", m_NetworkInputsBindingInfo);
325}
326
telsoa01c577f2c2018-08-31 09:22:23 +0100327BindingPointInfo CaffeParserBase::GetNetworkOutputBindingInfo(const std::string& name) const
telsoa014fcda012018-03-09 14:13:49 +0000328{
329 return GetBindingInfo(name, "output", m_NetworkOutputsBindingInfo);
330}
331
telsoa01c577f2c2018-08-31 09:22:23 +0100332std::pair<armnn::LayerBindingId, armnn::TensorInfo> CaffeParserBase::GetBindingInfo(const std::string& layerName,
telsoa014fcda012018-03-09 14:13:49 +0000333 const char* bindingPointDesc,
334 const std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
335{
336 auto it = nameToBindingInfo.find(layerName);
337 if (it == nameToBindingInfo.end())
338 {
telsoa01c577f2c2018-08-31 09:22:23 +0100339 throw InvalidArgumentException(
340 boost::str(
341 boost::format(
342 "Unknown binding %1% for layer '%2%'. %3%") %
343 bindingPointDesc %
344 layerName %
345 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000346 }
347 return it->second;
348}
349
telsoa01c577f2c2018-08-31 09:22:23 +0100350TensorInfo CaffeParserBase::BlobShapeToTensorInfo(const caffe::BlobShape& blobShape) const
telsoa014fcda012018-03-09 14:13:49 +0000351{
352 std::vector<unsigned int> shape;
353 for (int j = 0; j < blobShape.dim_size(); ++j)
354 {
355 shape.push_back(static_cast<unsigned int>(blobShape.dim(j)));
356 }
357
358 return TensorInfo(boost::numeric_cast<unsigned int>(shape.size()), shape.data(), DataType::Float32);
359}
360
361BlobShape TensorDescToBlobShape(const TensorInfo& desc)
362{
363 BlobShape ret;
364 for (unsigned int i = 0; i < desc.GetNumDimensions(); ++i)
365 {
366 ret.add_dim(i);
367 ret.set_dim(boost::numeric_cast<int>(i), desc.GetShape()[i]);
368 }
369
370 return ret;
371}
372
telsoa01c577f2c2018-08-31 09:22:23 +0100373// Note: can move to CaffeParser when/if we optimise the text/string format
374// to load on a layer by layer basis
375vector<const LayerParameter*> CaffeParserBase::GetInputs(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +0000376{
377 std::vector<const caffe::LayerParameter*> ret;
378 ret.reserve(boost::numeric_cast<size_t>(layerParam.bottom_size()));
379 for (int j = 0; j < layerParam.bottom_size(); ++j)
380 {
381 std::string inputName = layerParam.bottom(j);
382 auto inputIt = m_CaffeLayersByTopName.find(inputName);
383 if (inputIt == m_CaffeLayersByTopName.end())
384 {
385 throw ParseException(
telsoa01c577f2c2018-08-31 09:22:23 +0100386 boost::str(
387 boost::format(
388 "Can't find Caffe layer with top called '%1%', "
389 "which is listed as an input of '%2%'. %3%") %
390 inputName %
391 layerParam.name() %
392 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000393 }
394 ret.push_back(inputIt->second);
395 }
396
397 return ret;
398}
399
telsoa01c577f2c2018-08-31 09:22:23 +0100400void CaffeParserBase::ParseInputLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +0000401{
402 BOOST_ASSERT(layerParam.type() == "Input");
403 ValidateNumInputsOutputs(layerParam, 0, 1);
404
405 const InputParameter& param = layerParam.input_param();
406
telsoa01c577f2c2018-08-31 09:22:23 +0100407 const armnn::LayerBindingId inputId = boost::numeric_cast<armnn::LayerBindingId>(
408 m_NetworkInputsBindingInfo.size());
telsoa014fcda012018-03-09 14:13:49 +0000409 armnn::IConnectableLayer* const inputLayer = m_Network->AddInputLayer(inputId, layerParam.name().c_str());
410
telsoa01c577f2c2018-08-31 09:22:23 +0100411 // Decides the tensor info for this input. This can be specified in the Caffe network but can also
telsoa014fcda012018-03-09 14:13:49 +0000412 // be overriden by user input (m_inputShapes).
413 armnn::TensorInfo inputTensorInfo;
414
415 const BlobShape* originalShape = param.shape_size() > 0 && param.shape(0).dim_size() > 0 ?
416 &param.shape(0) : nullptr;
417 if (originalShape)
418 {
419 inputTensorInfo = BlobShapeToTensorInfo(*originalShape);
420 }
421
422 auto overrideIt = m_InputShapes.find(layerParam.name());
423 if (overrideIt != m_InputShapes.end())
424 {
425 const TensorShape& overrideShape = overrideIt->second;
426 if (originalShape &&
427 ( originalShape->dim(1) != overrideShape[1]
428 || originalShape->dim(2) != overrideShape[2]
429 || originalShape->dim(3) != overrideShape[3]))
430 {
telsoa01c577f2c2018-08-31 09:22:23 +0100431 throw ParseException(
432 boost::str(
433 boost::format(
434 "Parsed input shape for '%1%' is incompatible with the override provided. %2%") %
435 layerParam.name() %
436 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000437 }
438 inputTensorInfo.SetShape(overrideShape);
439 }
440 else if (!originalShape)
441 {
telsoa01c577f2c2018-08-31 09:22:23 +0100442 throw ParseException(
443 boost::str(
444 boost::format(
445 "No input descriptor given for '%1%' and no input shape found in caffe model. %2%") %
446 layerParam.name() %
447 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000448 }
449
450 TrackInputBinding(inputLayer, inputId, inputTensorInfo);
451 inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
452 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), inputLayer->GetOutputSlot(0));
453}
454
telsoa01c577f2c2018-08-31 09:22:23 +0100455void CaffeParserBase::AddConvLayerWithSplits(const caffe::LayerParameter& layerParam,
456 const armnn::Convolution2dDescriptor& desc,
457 unsigned int kernelW,
458 unsigned int kernelH)
telsoa014fcda012018-03-09 14:13:49 +0000459{
460 BOOST_ASSERT(layerParam.type() == "Convolution");
461 ValidateNumInputsOutputs(layerParam, 1, 1);
462
telsoa01c577f2c2018-08-31 09:22:23 +0100463 ConvolutionParameter convParam = layerParam.convolution_param();
telsoa014fcda012018-03-09 14:13:49 +0000464 BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
telsoa01c577f2c2018-08-31 09:22:23 +0100465 const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1;
telsoa014fcda012018-03-09 14:13:49 +0000466
telsoa01c577f2c2018-08-31 09:22:23 +0100467 // asusme these were already verified by the caller ParseConvLayer() function
468 BOOST_ASSERT(numGroups < inputShape.dim(1));
469 BOOST_ASSERT(numGroups > 1);
telsoa014fcda012018-03-09 14:13:49 +0000470
471 // Handle grouping
telsoa014fcda012018-03-09 14:13:49 +0000472 armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
473
474 vector<string> convLayerNames(numGroups);
475 vector<armnn::IConnectableLayer*> convLayers(numGroups);
476 convLayerNames[0] = layerParam.name();
477
telsoa01c577f2c2018-08-31 09:22:23 +0100478 // This convolution is to be applied to chunks of the input data so add a splitter layer
479
480 // Redirect the convolution input to the splitter
481 unsigned int splitterDimSizes[4] = {static_cast<unsigned int>(inputShape.dim(0)),
482 static_cast<unsigned int>(inputShape.dim(1)),
483 static_cast<unsigned int>(inputShape.dim(2)),
484 static_cast<unsigned int>(inputShape.dim(3))};
485
486 // Split dimension 1 of the splitter output shape and conv input shapes
487 // according to the number of groups
488
489 splitterDimSizes[1] /= numGroups;
490 inputShape.set_dim(1, splitterDimSizes[1]);
491
492 // This is used to describe how the input is to be split
493 ViewsDescriptor splitterDesc(numGroups);
494
495 // Create an output node for each group, giving each a unique name
496 for (unsigned int g = 0; g < numGroups; ++g)
telsoa014fcda012018-03-09 14:13:49 +0000497 {
telsoa01c577f2c2018-08-31 09:22:23 +0100498 // Work out the names of the splitter layers child convolutions
499 stringstream ss;
500 ss << layerParam.name() << "_" << g;
501 convLayerNames[g] = ss.str();
telsoa014fcda012018-03-09 14:13:49 +0000502
telsoa01c577f2c2018-08-31 09:22:23 +0100503 splitterDesc.SetViewOriginCoord(g, 1, splitterDimSizes[1] * g);
telsoa014fcda012018-03-09 14:13:49 +0000504
telsoa01c577f2c2018-08-31 09:22:23 +0100505 // Set the size of the views.
506 for (unsigned int dimIdx=0; dimIdx < 4; dimIdx++)
telsoa014fcda012018-03-09 14:13:49 +0000507 {
telsoa01c577f2c2018-08-31 09:22:23 +0100508 splitterDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
telsoa014fcda012018-03-09 14:13:49 +0000509 }
510 }
511
telsoa01c577f2c2018-08-31 09:22:23 +0100512 const std::string splitterLayerName = std::string("splitter_") + layerParam.bottom(0);
513 armnn::IConnectableLayer* splitterLayer = m_Network->AddSplitterLayer(splitterDesc, splitterLayerName.c_str());
telsoa014fcda012018-03-09 14:13:49 +0000514
telsoa01c577f2c2018-08-31 09:22:23 +0100515 inputConnection.Connect(splitterLayer->GetInputSlot(0));
516 for (unsigned int i = 0; i < splitterLayer->GetNumOutputSlots(); i++)
517 {
518 splitterLayer->GetOutputSlot(i).SetTensorInfo(BlobShapeToTensorInfo(inputShape));
519 }
telsoa014fcda012018-03-09 14:13:49 +0000520
521 unsigned int numFilters = convParam.num_output();
522
telsoa01c577f2c2018-08-31 09:22:23 +0100523 // Populates convolution output tensor descriptor dimensions.
telsoa014fcda012018-03-09 14:13:49 +0000524 BlobShape outputShape;
525 outputShape.add_dim(0);
526 outputShape.set_dim(0, inputShape.dim(0));
527 outputShape.add_dim(1);
telsoa01c577f2c2018-08-31 09:22:23 +0100528 // Ensures that dimension 1 of the convolution output is split according to the number of groups.
telsoa014fcda012018-03-09 14:13:49 +0000529 outputShape.set_dim(1, numFilters / numGroups);
530 outputShape.add_dim(2);
531 outputShape.set_dim(
telsoa01c577f2c2018-08-31 09:22:23 +0100532 2, (static_cast<int>(
533 static_cast<float>(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) /
534 static_cast<float>(desc.m_StrideY)) + 1));
telsoa014fcda012018-03-09 14:13:49 +0000535 outputShape.add_dim(3);
536 outputShape.set_dim(
telsoa01c577f2c2018-08-31 09:22:23 +0100537 3, (static_cast<int>(
538 static_cast<float>(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) /
539 static_cast<float>(desc.m_StrideX)) + 1));
telsoa014fcda012018-03-09 14:13:49 +0000540
541 // Load the weight data for ALL groups
telsoa01c577f2c2018-08-31 09:22:23 +0100542 vector<float> weightData(boost::numeric_cast<size_t>(numGroups *
543 inputShape.dim(1) * // number of input channels
544 outputShape.dim(1) * // number of output channels
545 kernelH *
546 kernelW));
telsoa014fcda012018-03-09 14:13:49 +0000547 GetDataFromBlob(layerParam, weightData, 0);
548
549 const unsigned int weightDimSizes[4] = {
telsoa01c577f2c2018-08-31 09:22:23 +0100550 static_cast<unsigned int>(outputShape.dim(1)),
551 static_cast<unsigned int>(inputShape.dim(1)),
552 kernelH,
553 kernelW};
telsoa014fcda012018-03-09 14:13:49 +0000554
telsoa014fcda012018-03-09 14:13:49 +0000555 TensorInfo biasInfo;
556 vector<float> biasData;
telsoa01c577f2c2018-08-31 09:22:23 +0100557
558 if (desc.m_BiasEnabled)
telsoa014fcda012018-03-09 14:13:49 +0000559 {
560 biasData.resize(boost::numeric_cast<size_t>(numGroups * outputShape.dim(1)), 1.f);
561 GetDataFromBlob(layerParam, biasData, 1);
562
563 const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
564 biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
565 }
566
567 const unsigned int numWeightsPerGroup = boost::numeric_cast<unsigned int>(weightData.size()) / numGroups;
568 const unsigned int numBiasesPerGroup = boost::numeric_cast<unsigned int>(biasData.size()) / numGroups;
569
telsoa014fcda012018-03-09 14:13:49 +0000570 for (unsigned int g = 0; g < numGroups; ++g)
571 {
telsoa01c577f2c2018-08-31 09:22:23 +0100572 // Sets the slot index, group 0 should be connected to the 0th output of the splitter
573 // group 1 should be connected to the 1st output of the splitter.
telsoa014fcda012018-03-09 14:13:49 +0000574
telsoa01c577f2c2018-08-31 09:22:23 +0100575 // Pulls out the weights for this group from that loaded from the model file earlier.
telsoa014fcda012018-03-09 14:13:49 +0000576 ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32),
577 weightData.data() + numWeightsPerGroup * g);
578
579 IConnectableLayer* convLayer = nullptr;
telsoa01c577f2c2018-08-31 09:22:23 +0100580 if (desc.m_BiasEnabled)
telsoa014fcda012018-03-09 14:13:49 +0000581 {
telsoa01c577f2c2018-08-31 09:22:23 +0100582 // Pulls out the biases for this group from that loaded from the model file earlier.
telsoa014fcda012018-03-09 14:13:49 +0000583 ConstTensor biases(biasInfo, biasData.data() + numBiasesPerGroup * g);
584
telsoa01c577f2c2018-08-31 09:22:23 +0100585 convLayer =
586 m_Network->AddConvolution2dLayer(desc, weights, biases, convLayerNames[g].c_str());
telsoa014fcda012018-03-09 14:13:49 +0000587 }
588 else
589 {
telsoa01c577f2c2018-08-31 09:22:23 +0100590 convLayer =
591 m_Network->AddConvolution2dLayer(desc, weights, convLayerNames[g].c_str());
telsoa014fcda012018-03-09 14:13:49 +0000592 }
593 convLayers[g] = convLayer;
594
595 // If we have more than one group then the input to the nth convolution the splitter layer's nth output,
596 // otherwise it's the regular input to this layer.
telsoa01c577f2c2018-08-31 09:22:23 +0100597 armnn::IOutputSlot& splitterInputConnection =
598 splitterLayer ? splitterLayer->GetOutputSlot(g) : inputConnection;
telsoa014fcda012018-03-09 14:13:49 +0000599 splitterInputConnection.Connect(convLayer->GetInputSlot(0));
600 convLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
telsoa014fcda012018-03-09 14:13:49 +0000601 }
602
telsoa01c577f2c2018-08-31 09:22:23 +0100603 // If the convolution was performed in chunks, add a layer to merge the results
604
605 // The merge input shape matches that of the convolution output
606 unsigned int mergeDimSizes[4] = {static_cast<unsigned int>(outputShape.dim(0)),
607 static_cast<unsigned int>(outputShape.dim(1)),
608 static_cast<unsigned int>(outputShape.dim(2)),
609 static_cast<unsigned int>(outputShape.dim(3))};
610
611 // This is used to describe how the input is to be merged
612 OriginsDescriptor mergeDesc(numGroups);
613
614 // Now create an input node for each group, using the name from
615 // the output of the corresponding convolution
616 for (unsigned int g = 0; g < numGroups; ++g)
telsoa014fcda012018-03-09 14:13:49 +0000617 {
telsoa01c577f2c2018-08-31 09:22:23 +0100618 mergeDesc.SetViewOriginCoord(g, 1, mergeDimSizes[1] * g);
619 }
telsoa014fcda012018-03-09 14:13:49 +0000620
telsoa01c577f2c2018-08-31 09:22:23 +0100621 // Make sure the output from the merge is the correct size to hold the data for all groups
622 mergeDimSizes[1] *= numGroups;
623 outputShape.set_dim(1, mergeDimSizes[1]);
telsoa014fcda012018-03-09 14:13:49 +0000624
telsoa01c577f2c2018-08-31 09:22:23 +0100625 // Finally add the merge layer
626 IConnectableLayer* mergerLayer = m_Network->AddMergerLayer(mergeDesc, layerParam.name().c_str());
telsoa014fcda012018-03-09 14:13:49 +0000627
telsoa01c577f2c2018-08-31 09:22:23 +0100628 if (!mergerLayer)
629 {
630 throw ParseException(
631 boost::str(
632 boost::format(
633 "Failed to create final merger layer for Split+Convolution+Merger. "
634 "Layer=%1% #groups=%2% #filters=%3% %4%") %
635 layerParam.name() %
636 numGroups %
637 numFilters %
638 CHECK_LOCATION().AsString()));
639 }
telsoa014fcda012018-03-09 14:13:49 +0000640
telsoa01c577f2c2018-08-31 09:22:23 +0100641 for (unsigned int g = 0; g < numGroups; ++g)
642 {
643 convLayers[g]->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(g));
644 }
645 mergerLayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(4, mergeDimSizes, DataType::Float32));
646 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), mergerLayer->GetOutputSlot(0));
647}
telsoa014fcda012018-03-09 14:13:49 +0000648
telsoa01c577f2c2018-08-31 09:22:23 +0100649void CaffeParserBase::AddConvLayerWithDepthwiseConv(const caffe::LayerParameter& layerParam,
650 const armnn::Convolution2dDescriptor& convDesc,
651 unsigned int kernelW,
652 unsigned int kernelH)
653{
654 BOOST_ASSERT(layerParam.type() == "Convolution");
655 ValidateNumInputsOutputs(layerParam, 1, 1);
telsoa014fcda012018-03-09 14:13:49 +0000656
telsoa01c577f2c2018-08-31 09:22:23 +0100657 ConvolutionParameter convParam = layerParam.convolution_param();
658 BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
telsoa014fcda012018-03-09 14:13:49 +0000659
telsoa01c577f2c2018-08-31 09:22:23 +0100660 DepthwiseConvolution2dDescriptor desc;
661 desc.m_PadLeft = convDesc.m_PadLeft;
662 desc.m_PadRight = convDesc.m_PadRight;
663 desc.m_PadTop = convDesc.m_PadTop;
664 desc.m_PadBottom = convDesc.m_PadBottom;
665 desc.m_StrideX = convDesc.m_StrideX;
666 desc.m_StrideY = convDesc.m_StrideY;
667 desc.m_BiasEnabled = convDesc.m_BiasEnabled;
telsoa014fcda012018-03-09 14:13:49 +0000668
telsoa01c577f2c2018-08-31 09:22:23 +0100669 unsigned int numFilters = convParam.num_output();
670
671 BlobShape outputShape;
672 outputShape.add_dim(0);
673 outputShape.set_dim(0, inputShape.dim(0));
674 outputShape.add_dim(1);
675 outputShape.set_dim(1, numFilters);
676 outputShape.add_dim(2);
677 outputShape.set_dim(
678 2, (static_cast<int>(
679 static_cast<float>(inputShape.dim(2) + 2 * desc.m_PadBottom - kernelH) /
680 static_cast<float>(desc.m_StrideY)) + 1));
681 outputShape.add_dim(3);
682 outputShape.set_dim(
683 3, (static_cast<int>(
684 static_cast<float>(inputShape.dim(3) + 2 * desc.m_PadRight - kernelW) /
685 static_cast<float>(desc.m_StrideX)) + 1));
686
687 // Load the weight data
688 size_t allWeightsSize = boost::numeric_cast<size_t>(inputShape.dim(1) * kernelH * kernelW);
689 vector<float> weightData(allWeightsSize);
690
691 GetDataFromBlob(layerParam, weightData, 0);
692
693 // depth multiplier will be 1 for the depthwise convolution
694 const unsigned int weightDimSizes[4] = {
695 static_cast<unsigned int>(1), // depth multiplier
696 static_cast<unsigned int>(inputShape.dim(1)), // #channels
697 kernelH,
698 kernelW};
699
700 armnn::IConnectableLayer* returnLayer = nullptr;
701 ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data());
702
703 if (desc.m_BiasEnabled)
704 {
705 TensorInfo biasInfo;
706 vector<float> biasData;
707
708 biasData.resize(boost::numeric_cast<size_t>(outputShape.dim(1)), 1.f);
709 GetDataFromBlob(layerParam, biasData, 1);
710
711 const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
712 biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
713
714 ConstTensor biases(biasInfo, biasData.data());
715 returnLayer = m_Network->AddDepthwiseConvolution2dLayer(desc, weights, biases, layerParam.name().c_str());
716 }
717 else
718 {
719 returnLayer = m_Network->AddDepthwiseConvolution2dLayer(desc, weights, layerParam.name().c_str());
telsoa014fcda012018-03-09 14:13:49 +0000720 }
721
surmeh013537c2c2018-05-18 16:31:43 +0100722 if (!returnLayer)
723 {
telsoa01c577f2c2018-08-31 09:22:23 +0100724 throw ParseException(
725 boost::str(
726 boost::format(
727 "Failed to create depthwise convolution layer. "
728 "Layer=%1% #filters=%2% %3%") %
729 layerParam.name() %
730 numFilters %
731 CHECK_LOCATION().AsString()));
732 }
733 armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
734 inputConnection.Connect(returnLayer->GetInputSlot(0));
735 returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
736 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0));
737}
738
739void CaffeParserBase::ParseConvLayer(const LayerParameter& layerParam)
740{
741 // Ignored Caffe Parameters
742 // * Dilation Size
743 // * Weight Filler
744 // * Bias Filler
745 // * Engine
746 // * Force nd_im2col
747 // * Axis
748
749 // Not Available ArmNN Interface Parameters
750 // * Rounding policy;
751
752 BOOST_ASSERT(layerParam.type() == "Convolution");
753 ValidateNumInputsOutputs(layerParam, 1, 1);
754
755 ConvolutionParameter convParam = layerParam.convolution_param();
756 BlobShape inputShape = TensorDescToBlobShape(GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo());
757 const unsigned int numGroups = convParam.has_group() ? convParam.group() : 1;
758 unsigned int numFilters = convParam.num_output();
759
760 const auto notFound = std::numeric_limits<unsigned int>::max();
761
762 unsigned int kernelH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
763 kernel_h, kernel_size, unsigned int, notFound);
764 unsigned int kernelW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
765 kernel_w, kernel_size, unsigned int, notFound);
766
767 unsigned int strideH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
768 stride_h, stride, unsigned int, 1u);
769 unsigned int strideW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
770 stride_w, stride, unsigned int, 1u);
771
772 unsigned int padH = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
773 pad_h, pad, unsigned int, 0u);
774 unsigned int padW = GET_OPTIONAL_WITH_VECTOR_FALLBACK(convParam, ConvolutionParameter,
775 pad_w, pad, unsigned int, 0u);
776
telsoa01c577f2c2018-08-31 09:22:23 +0100777 Convolution2dDescriptor convolution2dDescriptor;
778 convolution2dDescriptor.m_PadLeft = padW;
779 convolution2dDescriptor.m_PadRight = padW;
780 convolution2dDescriptor.m_PadTop = padH;
781 convolution2dDescriptor.m_PadBottom = padH;
782 convolution2dDescriptor.m_StrideX = strideW;
783 convolution2dDescriptor.m_StrideY = strideH;
784 convolution2dDescriptor.m_BiasEnabled = convParam.has_bias_term() ? convParam.bias_term() : true;
785
786 if (numGroups > numFilters)
787 {
788 throw ParseException(
789 boost::str(
790 boost::format(
791 "Error parsing Convolution: %1%. "
792 "The 'group'=%2% parameter cannot be larger than the "
793 "number of filters supplied ='%3%'. %4%") %
794 layerParam.name() %
795 numGroups %
796 numFilters %
797 CHECK_LOCATION().AsString()));
798 }
799
800 if (inputShape.dim_size() != 4)
801 {
802 throw ParseException(
803 boost::str(
804 boost::format(
805 "Convolution input shape is expected to have 4 dimensions. "
806 "%1%'s input has only %2%. %3%") %
807 layerParam.name() %
808 inputShape.dim_size() %
809 CHECK_LOCATION().AsString()));
810 }
811
812 if (numGroups > 1)
813 {
814 if (numGroups > inputShape.dim(1))
815 {
816 throw ParseException(
817 boost::str(
818 boost::format(
819 "Error parsing Convolution: %1%. "
820 "The 'group'=%2% parameter cannot be larger than the "
821 "channel of the input shape=%3% (in NCHW format). %4%") %
822 layerParam.name() %
823 numGroups %
824 inputShape.dim(1) %
825 CHECK_LOCATION().AsString()));
826 }
827 else if (numGroups == inputShape.dim(1))
828 {
829 // we use a depthwise convolution here, because the number of groups equals to the
830 // input channels
831 AddConvLayerWithDepthwiseConv(layerParam, convolution2dDescriptor, kernelW, kernelH);
832 return;
833 }
834 else
835 {
836 // we split the input by channels into channels/groups separate convolutions
837 // and merger the results afterwards
838 AddConvLayerWithSplits(layerParam, convolution2dDescriptor, kernelW, kernelH);
839 return;
840 }
841 }
842
843 // NOTE: at this point we only need to handle #group=1 case, all other cases should be
844 // handled by the AddConvLayer* helpers
845
846 // Populate convolution output tensor descriptor dimensions
847 BlobShape outputShape;
848 outputShape.add_dim(0);
849 outputShape.set_dim(0, inputShape.dim(0));
850 outputShape.add_dim(1);
851 outputShape.set_dim(1, numFilters);
852 outputShape.add_dim(2);
853 outputShape.set_dim(
854 2, (static_cast<int>(
855 static_cast<float>(inputShape.dim(2) + 2 * padH - kernelH) /
856 static_cast<float>(strideH)) + 1));
857 outputShape.add_dim(3);
858 outputShape.set_dim(
859 3, (static_cast<int>(
860 static_cast<float>(inputShape.dim(3) + 2 * padW - kernelW) /
861 static_cast<float>(strideW)) + 1));
862
863 // Load the weight data for ALL groups
864 vector<float> weightData(boost::numeric_cast<size_t>(inputShape.dim(1) *
865 outputShape.dim(1) *
866 kernelH *
867 kernelW));
868 GetDataFromBlob(layerParam, weightData, 0);
869
870 const unsigned int weightDimSizes[4] = {
871 static_cast<unsigned int>(outputShape.dim(1)), // output channels
872 static_cast<unsigned int>(inputShape.dim(1)), // input channels
873 kernelH,
874 kernelW};
875
876 armnn::IConnectableLayer* returnLayer = nullptr;
877
878 // Pull out the weights for this group from that loaded from the model file earlier
879 ConstTensor weights(TensorInfo(4, weightDimSizes, DataType::Float32), weightData.data());
880
881 if (convolution2dDescriptor.m_BiasEnabled)
882 {
883 TensorInfo biasInfo;
884 vector<float> biasData;
885
886 biasData.resize(boost::numeric_cast<size_t>(outputShape.dim(1)), 1.f);
887 GetDataFromBlob(layerParam, biasData, 1);
888
889 const unsigned int biasDimSizes[1] = {static_cast<unsigned int>(outputShape.dim(1))};
890 biasInfo = TensorInfo(1, biasDimSizes, DataType::Float32);
891
892 // Pull out the biases for this group from that loaded from the model file earlier
893 ConstTensor biases(biasInfo, biasData.data());
894
895 returnLayer =
896 m_Network->AddConvolution2dLayer(convolution2dDescriptor, weights, biases, layerParam.name().c_str());
897 }
898 else
899 {
900 returnLayer = m_Network->AddConvolution2dLayer(convolution2dDescriptor, weights, layerParam.name().c_str());
901 }
902
903 armnn::IOutputSlot& inputConnection = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
904 inputConnection.Connect(returnLayer->GetInputSlot(0));
905 returnLayer->GetOutputSlot(0).SetTensorInfo(BlobShapeToTensorInfo(outputShape));
906
907 if (!returnLayer)
908 {
909 throw ParseException(
910 boost::str(
911 boost::format(
912 "Failed to create Convolution layer. "
913 "Layer=%1% #groups=%2% #filters=%3% %4%") %
914 layerParam.name() %
915 numGroups %
916 numFilters %
917 CHECK_LOCATION().AsString()));
surmeh013537c2c2018-05-18 16:31:43 +0100918 }
919
telsoa014fcda012018-03-09 14:13:49 +0000920 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), returnLayer->GetOutputSlot(0));
921}
922
telsoa01c577f2c2018-08-31 09:22:23 +0100923void CaffeParserBase::ParsePoolingLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +0000924{
telsoa01c577f2c2018-08-31 09:22:23 +0100925 // Ignored Caffe Parameters
926 // Stochastic Pooling
927 // Engine
928
telsoa014fcda012018-03-09 14:13:49 +0000929 ValidateNumInputsOutputs(layerParam, 1, 1);
telsoa014fcda012018-03-09 14:13:49 +0000930 PoolingParameter param = layerParam.pooling_param();
telsoa014fcda012018-03-09 14:13:49 +0000931 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
932
telsoa01c577f2c2018-08-31 09:22:23 +0100933 const auto notFound = std::numeric_limits<unsigned int>::max();
934
935 unsigned int kernel_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
936 kernel_h, kernel_size, unsigned int, notFound);
937 unsigned int kernel_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
938 kernel_w, kernel_size, unsigned int, notFound);
939
940 if ((kernel_h == notFound || kernel_w == notFound) && param.has_global_pooling())
telsoa014fcda012018-03-09 14:13:49 +0000941 {
942 kernel_h = inputInfo.GetShape()[2];
943 kernel_w = inputInfo.GetShape()[3];
944 }
telsoa01c577f2c2018-08-31 09:22:23 +0100945
946 VALIDATE_EQUAL_VALUES_IN_RANGE(kernel_h, kernel_w, 0, 11);
947
948 unsigned int stride_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
949 stride_h, stride, unsigned int, notFound);
950 unsigned int stride_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
951 stride_h, stride, unsigned int, notFound);
952
953 if ((stride_h == notFound || stride_w == notFound) && param.has_global_pooling())
telsoa014fcda012018-03-09 14:13:49 +0000954 {
telsoa01c577f2c2018-08-31 09:22:23 +0100955 stride_h = 1;
956 stride_w = 1;
telsoa014fcda012018-03-09 14:13:49 +0000957 }
958
telsoa01c577f2c2018-08-31 09:22:23 +0100959 VALIDATE_EQUAL_VALUES_IN_RANGE(stride_h, stride_w, 0, 11);
telsoa014fcda012018-03-09 14:13:49 +0000960
telsoa01c577f2c2018-08-31 09:22:23 +0100961 unsigned int pad_h = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
962 pad_h, pad, unsigned int, 0u);
963 unsigned int pad_w = GET_OPTIONAL_WITH_FALLBACK(param, PoolingParameter,
964 pad_w, pad, unsigned int, 0u);
telsoa014fcda012018-03-09 14:13:49 +0000965
telsoa01c577f2c2018-08-31 09:22:23 +0100966 VALIDATE_EQUAL_VALUES_IN_RANGE(pad_h, pad_w, 0, 11);
telsoa014fcda012018-03-09 14:13:49 +0000967
968 // Populate Weight and Bias Filter Descriptor
969 Pooling2dDescriptor pooling2dDescriptor;
970 if (param.has_pool())
971 {
972 PoolingParameter_PoolMethod p = param.pool();
973 switch (p)
974 {
975 case PoolingParameter_PoolMethod_MAX:
976 {
977 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Max;
978 break;
979 }
980 case PoolingParameter_PoolMethod_AVE:
981 {
982 pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Average;
983 break;
984 }
985 case PoolingParameter_PoolMethod_STOCHASTIC:
986 {
telsoa01c577f2c2018-08-31 09:22:23 +0100987 throw ParseException(
988 boost::str(
989 boost::format(
990 "Pooling Layer: Stochastic Pooling Not Supported. Layer=%1% %2%") %
991 layerParam.name() %
992 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000993 }
994 default:
995 {
telsoa01c577f2c2018-08-31 09:22:23 +0100996 throw ParseException(
997 boost::str(
998 boost::format(
999 "Pooling Layer: unknown pooling method: %1% for layer: %2% %3%") %
1000 p %
1001 layerParam.name() %
1002 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001003 }
1004 }
1005 }
1006 else
1007 {
telsoa01c577f2c2018-08-31 09:22:23 +01001008 throw ParseException(
1009 boost::str(
1010 boost::format(
1011 "No Pooling Method Defined for %1% %2%") %
1012 layerParam.name() %
1013 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001014 }
1015
1016 pooling2dDescriptor.m_PadLeft = pad_w;
1017 pooling2dDescriptor.m_PadRight = pad_w;
1018 pooling2dDescriptor.m_PadTop = pad_h;
1019 pooling2dDescriptor.m_PadBottom = pad_h;
1020 pooling2dDescriptor.m_StrideX = stride_w;
1021 pooling2dDescriptor.m_StrideY = stride_h;
1022 pooling2dDescriptor.m_PoolWidth = kernel_w;
1023 pooling2dDescriptor.m_PoolHeight = kernel_h;
1024
1025 pooling2dDescriptor.m_OutputShapeRounding = OutputShapeRounding::Ceiling;
1026 pooling2dDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue;
1027
1028 armnn::IConnectableLayer* poolingLayer = m_Network->AddPooling2dLayer(pooling2dDescriptor,
1029 layerParam.name().c_str());
1030
telsoa014fcda012018-03-09 14:13:49 +00001031 TensorInfo outputInfo(
1032 { inputInfo.GetShape()[0],
1033 inputInfo.GetShape()[1],
1034 static_cast<unsigned int>(ceil(
1035 static_cast<float>(inputInfo.GetShape()[2] + 2 * pad_h - kernel_h) /
1036 boost::numeric_cast<float>(stride_h))) + 1,
1037 static_cast<unsigned int>(ceil(
1038 static_cast<float>(inputInfo.GetShape()[3] + 2 * pad_w - kernel_w) /
1039 boost::numeric_cast<float>(stride_w))) + 1 },
1040 DataType::Float32);
1041
1042 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(poolingLayer->GetInputSlot(0));
1043 poolingLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1044 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), poolingLayer->GetOutputSlot(0));
1045}
1046
telsoa01c577f2c2018-08-31 09:22:23 +01001047void CaffeParserBase::ParseReluLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001048{
1049 ValidateNumInputsOutputs(layerParam, 1, 1);
1050
1051 const string& name = layerParam.name();
1052 const ReLUParameter& param = layerParam.relu_param();
1053
1054 ActivationDescriptor activationDescriptor;
1055 const float negativeSlope = param.negative_slope();
1056 if (negativeSlope == 0.0f)
1057 {
1058 activationDescriptor.m_Function = ActivationFunction::ReLu;
1059 }
1060 else
1061 {
1062 activationDescriptor.m_Function = ActivationFunction::LeakyReLu;
1063 activationDescriptor.m_A = negativeSlope;
1064 }
1065
1066 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1067 IConnectableLayer* const activationLayer = m_Network->AddActivationLayer(activationDescriptor, name.c_str());
1068 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(activationLayer->GetInputSlot(0));
1069 activationLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1070 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), activationLayer->GetOutputSlot(0));
1071}
1072
telsoa01c577f2c2018-08-31 09:22:23 +01001073void CaffeParserBase::ParseLRNLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001074{
1075 ValidateNumInputsOutputs(layerParam, 1, 1);
1076
1077 LRNParameter param = layerParam.lrn_param();
1078
1079 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1080
telsoa01c577f2c2018-08-31 09:22:23 +01001081 // Ignored BATCH NORMALIZATION Caffe Parameters.
1082 // Ignored MVN Caffe Parameters.
1083 // Ignored LRN Caffe Parameters.
telsoa014fcda012018-03-09 14:13:49 +00001084 // Engine
1085
1086 NormalizationDescriptor normalizationDescriptor;
1087 if (param.has_norm_region())
1088 {
1089 LRNParameter_NormRegion n = param.norm_region();
1090 switch (n)
1091 {
1092 case LRNParameter_NormRegion_ACROSS_CHANNELS:
1093 {
1094 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1095 break;
1096 }
1097 case LRNParameter_NormRegion_WITHIN_CHANNEL:
1098 {
1099 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Within;
1100 break;
1101 }
1102 default:
telsoa01c577f2c2018-08-31 09:22:23 +01001103 {
1104 throw ParseException(
1105 boost::str(
1106 boost::format(
1107 "Unknown region %1% for LRN layer %2% %3%") %
1108 n %
1109 layerParam.name() %
1110 CHECK_LOCATION().AsString()));
1111 }
telsoa014fcda012018-03-09 14:13:49 +00001112 }
1113 }
1114 else
1115 {
telsoa01c577f2c2018-08-31 09:22:23 +01001116 // Caffe defaults to normalization across channels.
telsoa014fcda012018-03-09 14:13:49 +00001117 normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1118 }
1119
1120 normalizationDescriptor.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
1121 if (param.has_local_size())
1122 {
1123 normalizationDescriptor.m_NormSize = param.local_size();
1124 }
1125 else
1126 {
telsoa01c577f2c2018-08-31 09:22:23 +01001127 throw ParseException(
1128 boost::str(
1129 boost::format(
1130 "local_size not defined for LRN layer %1% %2%") %
1131 layerParam.name() %
1132 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001133 }
1134
1135 if (param.has_alpha())
1136 {
1137 normalizationDescriptor.m_Alpha = param.alpha();
1138 normalizationDescriptor.m_Alpha /= boost::numeric_cast<float>(param.local_size());
1139 }
1140 else
1141 {
telsoa01c577f2c2018-08-31 09:22:23 +01001142 throw ParseException(
1143 boost::str(
1144 boost::format(
1145 "Alpha not defined for LRN layer %1% %2%") %
1146 layerParam.name() %
1147 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001148 }
1149 if (param.has_beta())
1150 {
1151 normalizationDescriptor.m_Beta = param.beta();
1152 }
1153 else
1154 {
telsoa01c577f2c2018-08-31 09:22:23 +01001155 throw ParseException(
1156 boost::str(
1157 boost::format(
1158 "Beta not defined for LRN layer %1% %2%") %
1159 layerParam.name() %
1160 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001161 }
telsoa01c577f2c2018-08-31 09:22:23 +01001162
telsoa014fcda012018-03-09 14:13:49 +00001163 if (param.has_k())
1164 {
1165 normalizationDescriptor.m_K = param.k();
1166 }
1167 else
telsoa01c577f2c2018-08-31 09:22:23 +01001168 {
telsoa014fcda012018-03-09 14:13:49 +00001169 normalizationDescriptor.m_K = 1;
telsoa01c577f2c2018-08-31 09:22:23 +01001170 }
telsoa014fcda012018-03-09 14:13:49 +00001171
1172 IConnectableLayer* const normLayer = m_Network->AddNormalizationLayer(normalizationDescriptor,
1173 layerParam.name().c_str());
1174 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(normLayer->GetInputSlot(0));
1175 normLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1176
1177 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), normLayer->GetOutputSlot(0));
1178}
1179
telsoa01c577f2c2018-08-31 09:22:23 +01001180void CaffeParserBase::ParseInnerProductLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001181{
1182 InnerProductParameter param = layerParam.inner_product_param();
1183
1184 ValidateNumInputsOutputs(layerParam, 1, 1);
1185
1186 unsigned int outputSize = param.num_output();
1187
telsoa01c577f2c2018-08-31 09:22:23 +01001188 // Ignored Caffe Parameters:
telsoa014fcda012018-03-09 14:13:49 +00001189 // Weight Filler
1190 // Bias Filler
1191 // Engine
1192 // Axis
1193
1194 FullyConnectedDescriptor tensorFullyConnectedDescriptor;
1195
1196 if (param.has_transpose())
1197 {
telsoa01c577f2c2018-08-31 09:22:23 +01001198 // If true, assumes transposed weights.
telsoa014fcda012018-03-09 14:13:49 +00001199 tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = param.transpose();
1200 }
1201 else
1202 {
telsoa01c577f2c2018-08-31 09:22:23 +01001203 // Caffe defaults to transposed.
telsoa014fcda012018-03-09 14:13:49 +00001204 tensorFullyConnectedDescriptor.m_TransposeWeightMatrix = true;
1205 }
1206
1207 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1208
1209 TensorInfo weightInfo;
1210 TensorInfo biasInfo;
1211
telsoa01c577f2c2018-08-31 09:22:23 +01001212 // Allows implicit flattening of extra dimensions.
telsoa014fcda012018-03-09 14:13:49 +00001213 unsigned int inputSize = inputInfo.GetShape()[1];
1214 for (unsigned int i = 2; i < inputInfo.GetNumDimensions(); ++i)
1215 {
1216 inputSize *= inputInfo.GetShape()[i];
1217 }
1218
telsoa01c577f2c2018-08-31 09:22:23 +01001219 const float* weightDataPtr = GetArrayPtrFromBlob(layerParam, 0);
telsoa014fcda012018-03-09 14:13:49 +00001220 const unsigned int swTD[2] = { outputSize, inputSize };
telsoa01c577f2c2018-08-31 09:22:23 +01001221 ConstTensor weights(TensorInfo(2, swTD, DataType::Float32), weightDataPtr);
telsoa014fcda012018-03-09 14:13:49 +00001222
1223 tensorFullyConnectedDescriptor.m_BiasEnabled = true;
telsoa01c577f2c2018-08-31 09:22:23 +01001224 // Todo: check whether bias enabled.
telsoa014fcda012018-03-09 14:13:49 +00001225 armnn::IConnectableLayer* fullyConnectedLayer = nullptr;
1226 if (tensorFullyConnectedDescriptor.m_BiasEnabled)
1227 {
1228 // BIAS VALUE
telsoa01c577f2c2018-08-31 09:22:23 +01001229 const float* biasDataPtr = GetArrayPtrFromBlob(layerParam, 1);
telsoa014fcda012018-03-09 14:13:49 +00001230
1231 const unsigned int sbTD[1] = { outputSize };
1232
telsoa01c577f2c2018-08-31 09:22:23 +01001233 ConstTensor biases(TensorInfo(1, sbTD, DataType::Float32), biasDataPtr);
telsoa014fcda012018-03-09 14:13:49 +00001234
1235 fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights, biases,
1236 layerParam.name().c_str());
1237 }
1238 else
1239 {
1240 fullyConnectedLayer = m_Network->AddFullyConnectedLayer(tensorFullyConnectedDescriptor, weights,
1241 layerParam.name().c_str());
1242 }
1243
1244 TensorInfo outputInfo({ inputInfo.GetShape()[0], outputSize }, DataType::Float32);
1245 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(fullyConnectedLayer->GetInputSlot(0));
1246 fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1247 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), fullyConnectedLayer->GetOutputSlot(0));
1248}
1249
telsoa01c577f2c2018-08-31 09:22:23 +01001250void CaffeParserBase::ParseSoftmaxLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001251{
1252 ValidateNumInputsOutputs(layerParam, 1, 1);
1253
1254 SoftmaxParameter param = layerParam.softmax_param();
1255
1256 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1257
telsoa01c577f2c2018-08-31 09:22:23 +01001258 // Ignored Caffe Parameters:
telsoa014fcda012018-03-09 14:13:49 +00001259 // axis
1260 // Engine
1261
1262 armnn::SoftmaxDescriptor softmaxDescriptor;
1263 armnn::IConnectableLayer* const softmaxLayer = m_Network->AddSoftmaxLayer(
1264 softmaxDescriptor,
1265 layerParam.name().c_str());
1266 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(softmaxLayer->GetInputSlot(0));
1267 softmaxLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1268 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), softmaxLayer->GetOutputSlot(0));
1269}
1270
telsoa01c577f2c2018-08-31 09:22:23 +01001271void CaffeParserBase::ParseEltwiseLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001272{
1273 ValidateNumInputsOutputs(layerParam, 2, 1);
1274
1275 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1276
telsoa01c577f2c2018-08-31 09:22:23 +01001277 // Ignored Caffe Parameters:
telsoa014fcda012018-03-09 14:13:49 +00001278 // coeff
1279
telsoa01c577f2c2018-08-31 09:22:23 +01001280 EltwiseParameter_EltwiseOp operation = EltwiseParameter_EltwiseOp_SUM; // Defaults to sum as per caffe.
telsoa014fcda012018-03-09 14:13:49 +00001281
1282 if (layerParam.has_eltwise_param() && layerParam.eltwise_param().has_operation())
1283 {
1284 operation = layerParam.eltwise_param().operation();
1285 }
1286
1287 armnn::IConnectableLayer* newLayer = nullptr;
1288 switch (operation)
1289 {
1290 case EltwiseParameter_EltwiseOp_SUM:
1291 {
1292 newLayer = m_Network->AddAdditionLayer(layerParam.name().c_str());
1293 break;
1294 }
1295 case EltwiseParameter_EltwiseOp_PROD:
1296 {
1297 newLayer = m_Network->AddMultiplicationLayer(layerParam.name().c_str());
1298 break;
1299 }
1300 default:
1301 {
telsoa01c577f2c2018-08-31 09:22:23 +01001302 throw ParseException(
1303 boost::str(
1304 boost::format(
1305 "Unsupported operation %1% in Eltwise layer %2% %3%") %
1306 operation %
1307 layerParam.name() %
1308 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001309 }
1310 }
1311
1312 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(newLayer->GetInputSlot(0));
1313 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(1)).Connect(newLayer->GetInputSlot(1));
1314 newLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1315 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), newLayer->GetOutputSlot(0));
1316}
1317
telsoa01c577f2c2018-08-31 09:22:23 +01001318void CaffeParserBase::ParseConcatLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001319{
1320 unsigned int numInputs = static_cast<unsigned int>(layerParam.bottom_size());
telsoa01c577f2c2018-08-31 09:22:23 +01001321 // We assume concat happens along the channel dimension, which is 1 in (0, 1, 2, 3).
telsoa014fcda012018-03-09 14:13:49 +00001322 unsigned int concatDim = 1;
1323 unsigned int numOfDims = 4;
1324
telsoa01c577f2c2018-08-31 09:22:23 +01001325 // we only consider 4-D tensor here
1326 OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numInputs), numOfDims);
telsoa014fcda012018-03-09 14:13:49 +00001327 std::vector<unsigned int>mergeDimSizes(numOfDims, 0u);
1328
1329 unsigned int mergeDim = 0;
1330 for (unsigned int viewIndex = 0; viewIndex < numInputs; ++viewIndex)
1331 {
1332 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(
1333 layerParam.bottom(boost::numeric_cast<int>(viewIndex))).GetTensorInfo();
telsoa01c577f2c2018-08-31 09:22:23 +01001334 // Checks whether the dimensions of the input tensors are actually 4.
telsoa014fcda012018-03-09 14:13:49 +00001335 if (inputInfo.GetNumDimensions()!=4)
1336 {
telsoa01c577f2c2018-08-31 09:22:23 +01001337 throw ParseException(
1338 boost::str(
1339 boost::format(
1340 "The number of dimensions for input tensors of "
1341 "the concatenation op should be 4. Inputs of %1% has "
1342 "%2% dimensions. %3%") %
1343 layerParam.name() %
1344 inputInfo.GetNumDimensions() %
1345 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001346 }
1347
1348 mergeDimSizes[0] = inputInfo.GetShape()[0];
1349 mergeDimSizes[1] = inputInfo.GetShape()[1];
1350 mergeDimSizes[2] = inputInfo.GetShape()[2];
1351 mergeDimSizes[3] = inputInfo.GetShape()[3];
1352
1353 for (unsigned int j = 0; j < concatDim; ++j)
1354 {
1355 concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1356 }
1357
1358 concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim);
1359 mergeDim += mergeDimSizes[concatDim];
1360
1361 for (unsigned int j = concatDim+1; j < numOfDims; ++j)
1362 {
1363 concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
1364 }
1365 }
1366 mergeDimSizes[concatDim] = mergeDim;
1367
telsoa01c577f2c2018-08-31 09:22:23 +01001368 armnn::IConnectableLayer* concatlayer = m_Network->AddMergerLayer(concatDescriptor, layerParam.name().c_str());
telsoa014fcda012018-03-09 14:13:49 +00001369 for (unsigned int i = 0; i < numInputs; ++i)
1370 {
1371 armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(boost::numeric_cast<int>(i)));
1372 outputSlot.Connect(concatlayer->GetInputSlot(i));
1373 }
1374
1375 concatlayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(numOfDims, mergeDimSizes.data(), DataType::Float32));
1376 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), concatlayer->GetOutputSlot(0));
1377}
1378
telsoa01c577f2c2018-08-31 09:22:23 +01001379void CaffeParserBase::ParseBatchNormLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001380{
1381 ValidateNumInputsOutputs(layerParam, 1, 1);
1382
1383 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1384
1385 string name = layerParam.name();
1386
1387 BatchNormParameter param = layerParam.batch_norm_param();
1388 // If use_global_stats is not explicitly set in the model, assume it to be true (its default value
1389 // when the network is in the testing phase).
1390 if (param.has_use_global_stats())
1391 {
1392 if (!param.use_global_stats())
1393 {
telsoa01c577f2c2018-08-31 09:22:23 +01001394 throw ParseException(
1395 boost::str(
1396 boost::format(
1397 "Error parsing Batch Norm layer '%1%': "
1398 "Parameter 'use_global_stats' is set to false, which is "
1399 "unsupported (value used for training). %2%") %
1400 name %
1401 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001402 }
1403 }
1404
1405 BatchNormalizationDescriptor desc;
1406 desc.m_Eps = param.eps();
1407
1408 unsigned int channels = inputInfo.GetShape()[1];
1409 unsigned int shape[] = {channels};
1410
1411 vector<float> meanData(channels);
1412 GetDataFromBlob(layerParam, meanData, 0);
1413
1414 vector<float> varianceData(channels);
1415 GetDataFromBlob(layerParam, varianceData, 1);
1416
telsoa01c577f2c2018-08-31 09:22:23 +01001417 // Reads moving average factor and applies scaling (if required).
surmeh013537c2c2018-05-18 16:31:43 +01001418 const BlobProto& blob = layerParam.blobs(boost::numeric_cast<int>(2));
1419 const float movingAverageFactor = blob.data(boost::numeric_cast<int>(0));
1420 if(movingAverageFactor != 0.0f)
1421 {
1422 const float scaleFactor = 1.0f / movingAverageFactor;
1423 auto scaleFunction = [scaleFactor](float f) -> float { return f * scaleFactor; };
1424
1425 std::transform(varianceData.begin(), varianceData.end(), varianceData.begin(), scaleFunction);
1426 std::transform(meanData.begin(), meanData.end(), meanData.begin(), scaleFunction);
1427 }
1428
telsoa01c577f2c2018-08-31 09:22:23 +01001429 // Identifies scale operation.
telsoa014fcda012018-03-09 14:13:49 +00001430 vector<float> betaData(channels, 0.0f);
1431 vector<float> gammaData(channels, 1.0f);
1432
1433 ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1434 ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1435 ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1436 ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1437
1438 armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1439 mean, variance, beta, gamma, name.c_str());
1440 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1441 batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1442 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1443}
1444
telsoa01c577f2c2018-08-31 09:22:23 +01001445void CaffeParserBase::ParseScaleLayer(const LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001446{
telsoa01c577f2c2018-08-31 09:22:23 +01001447 // Current unoptimal solution: add a batchnormalization layer with 0 mean and 1 variance.
telsoa014fcda012018-03-09 14:13:49 +00001448 ValidateNumInputsOutputs(layerParam, 1, 1);
1449
1450 const TensorInfo& inputInfo = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).GetTensorInfo();
1451
1452 string name = layerParam.name();
1453
1454 ScaleParameter param = layerParam.scale_param();
1455 if (param.axis() != 1)
1456 {
1457 // Would have to use something other than BatchNormalizationLayer in this case
telsoa01c577f2c2018-08-31 09:22:23 +01001458 throw ParseException(
1459 boost::str(
1460 boost::format(
1461 "Loading Scale Layer: Only axis 1 is supported currently. "
1462 "Layer=%1% Axis=%2% %3%") %
1463 layerParam.name() %
1464 param.axis() %
1465 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001466 }
1467
1468 unsigned int channels = inputInfo.GetShape()[1];
1469 unsigned int shape[] = {channels};
1470
1471 BatchNormalizationDescriptor desc;
telsoa01c577f2c2018-08-31 09:22:23 +01001472 desc.m_Eps = 0.0f; // Don't need epsilon if variance is 1.
telsoa014fcda012018-03-09 14:13:49 +00001473 vector<float> meanData(channels, 0.0f);
1474 vector<float> varianceData(channels, 1.0f);
1475 vector<float> betaData(channels, 0.0f);
1476 vector<float> gammaData(channels);
1477
1478 GetDataFromBlob(layerParam, gammaData, 0);
1479
1480 if(param.has_bias_term())
1481 {
1482 GetDataFromBlob(layerParam, betaData, 1);
1483 }
1484
1485 ConstTensor mean(TensorInfo(1, shape, armnn::DataType::Float32), meanData);
1486 ConstTensor variance(TensorInfo(1, shape, armnn::DataType::Float32), varianceData);
1487 ConstTensor beta(TensorInfo(1, shape, armnn::DataType::Float32), betaData);
1488 ConstTensor gamma(TensorInfo(1, shape, armnn::DataType::Float32), gammaData);
1489
1490 armnn::IConnectableLayer* const batchNormLayer = m_Network->AddBatchNormalizationLayer(desc,
1491 mean, variance, beta, gamma, name.c_str());
1492 GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)).Connect(batchNormLayer->GetInputSlot(0));
1493 batchNormLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1494 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), batchNormLayer->GetOutputSlot(0));
1495}
1496
telsoa01c577f2c2018-08-31 09:22:23 +01001497void CaffeParserBase::ParseSplitLayer(const caffe::LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001498{
telsoa01c577f2c2018-08-31 09:22:23 +01001499 // Used in caffe to duplicate memory - not necessary in armnn.
telsoa014fcda012018-03-09 14:13:49 +00001500 if (layerParam.bottom_size() != 1)
1501 {
telsoa01c577f2c2018-08-31 09:22:23 +01001502 throw ParseException(
1503 boost::str(
1504 boost::format(
1505 "Split layer '%1%' should have exactly 1 bottom. "
1506 "#bottoms=%2% %3%") %
1507 layerParam.name() %
1508 layerParam.bottom_size() %
1509 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001510 }
1511 armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0));
1512 for (int i = 0; i < layerParam.top_size(); i++)
1513 {
1514 SetArmnnOutputSlotForCaffeTop(layerParam.top(i), outputSlot);
1515 }
1516}
1517
telsoa01c577f2c2018-08-31 09:22:23 +01001518void CaffeParserBase::ParseDropoutLayer(const caffe::LayerParameter& layerParam)
telsoa014fcda012018-03-09 14:13:49 +00001519{
telsoa01c577f2c2018-08-31 09:22:23 +01001520 // Ignored for inference, so patch the single input to its single output.
telsoa014fcda012018-03-09 14:13:49 +00001521 if (layerParam.bottom_size() != 1 || layerParam.top_size() != 1)
1522 {
telsoa01c577f2c2018-08-31 09:22:23 +01001523 throw ParseException(
1524 boost::str(
1525 boost::format(
1526 "Dropout layer '%1%' should have exactly 1 bottom and 1 top. "
1527 "#bottoms=%2% #tops=%3% %4%") %
1528 layerParam.name() %
1529 layerParam.bottom_size() %
1530 layerParam.top_size() %
1531 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001532 }
1533 SetArmnnOutputSlotForCaffeTop(layerParam.top(0), GetArmnnOutputSlotForCaffeTop(layerParam.bottom(0)));
1534}
1535
telsoa01c577f2c2018-08-31 09:22:23 +01001536void CaffeParserBase::TrackInputBinding(armnn::IConnectableLayer* layer,
telsoa014fcda012018-03-09 14:13:49 +00001537 armnn::LayerBindingId id,
1538 const armnn::TensorInfo& tensorInfo)
1539{
1540 return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkInputsBindingInfo);
1541}
1542
telsoa01c577f2c2018-08-31 09:22:23 +01001543void CaffeParserBase::TrackOutputBinding(armnn::IConnectableLayer* layer,
telsoa014fcda012018-03-09 14:13:49 +00001544 armnn::LayerBindingId id,
1545 const armnn::TensorInfo& tensorInfo)
1546{
1547 return TrackBindingPoint(layer, id, tensorInfo, layer->GetName(), m_NetworkOutputsBindingInfo);
1548}
1549
telsoa01c577f2c2018-08-31 09:22:23 +01001550void CaffeParserBase::TrackBindingPoint(armnn::IConnectableLayer* layer,
telsoa014fcda012018-03-09 14:13:49 +00001551 armnn::LayerBindingId id,
1552 const armnn::TensorInfo& tensorInfo,
1553 const char* bindingPointDesc,
1554 std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
1555{
1556 const std::string layerName = layer->GetName();
1557 auto it = nameToBindingInfo.find(layerName);
1558 if (it == nameToBindingInfo.end())
1559 {
1560 nameToBindingInfo[layerName] = std::make_pair(id, tensorInfo);
1561 }
1562 else
1563 {
telsoa01c577f2c2018-08-31 09:22:23 +01001564 throw ParseException(
1565 boost::str(
1566 boost::format(
1567 "Id %1% used by more than one %2% layer %3%") %
1568 id %
1569 bindingPointDesc %
1570 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001571 }
1572}
1573
telsoa01c577f2c2018-08-31 09:22:23 +01001574armnn::IOutputSlot& CaffeParserBase::GetArmnnOutputSlotForCaffeTop(const std::string& caffeTopName) const
telsoa014fcda012018-03-09 14:13:49 +00001575{
1576 auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1577 if (it != m_ArmnnOutputSlotForCaffeTop.end())
1578 {
1579 return *it->second;
1580 }
1581 else
1582 {
telsoa01c577f2c2018-08-31 09:22:23 +01001583 throw ParseException(
1584 boost::str(
1585 boost::format(
1586 "Could not find armnn output slot for Caffe top '%1%' %2%") %
1587 caffeTopName %
1588 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001589 }
1590}
1591
telsoa01c577f2c2018-08-31 09:22:23 +01001592void CaffeParserBase::SetArmnnOutputSlotForCaffeTop(
1593 const std::string& caffeTopName, armnn::IOutputSlot& armnnOutputSlot)
telsoa014fcda012018-03-09 14:13:49 +00001594{
1595 auto it = m_ArmnnOutputSlotForCaffeTop.find(caffeTopName);
1596 if (it == m_ArmnnOutputSlotForCaffeTop.end())
1597 {
1598 m_ArmnnOutputSlotForCaffeTop[caffeTopName] = &armnnOutputSlot;
1599 }
1600 else
1601 {
telsoa01c577f2c2018-08-31 09:22:23 +01001602 throw ParseException(
1603 boost::str(
1604 boost::format(
1605 "Attempting to add duplicate entry for Caffe top '%1%' %2%") %
1606 caffeTopName %
1607 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001608 }
1609}
1610
telsoa01c577f2c2018-08-31 09:22:23 +01001611// Note: can move to CaffeParser when/if we optimise the text/string format
1612// to load on a layer by layer basis
1613void CaffeParserBase::ResolveInPlaceLayers(caffe::NetParameter& netParameter)
telsoa014fcda012018-03-09 14:13:49 +00001614{
telsoa01c577f2c2018-08-31 09:22:23 +01001615 // Finds layers with the same top.
telsoa014fcda012018-03-09 14:13:49 +00001616 std::map<std::string, std::vector<caffe::LayerParameter*>> layersByTop;
1617 for (int layerIdx = 0; layerIdx < netParameter.layer_size(); ++layerIdx)
1618 {
1619 caffe::LayerParameter& layer = *netParameter.mutable_layer(layerIdx);
telsoa01c577f2c2018-08-31 09:22:23 +01001620 std::string name = layer.name();
telsoa014fcda012018-03-09 14:13:49 +00001621 for (int i = 0; i < layer.top_size(); ++i)
1622 {
1623 layersByTop[layer.top(i)].push_back(&layer);
1624 }
1625 }
1626
telsoa01c577f2c2018-08-31 09:22:23 +01001627 // For each set of layers with the same top, resolves them to a linear chain rather than in-place layers.
telsoa014fcda012018-03-09 14:13:49 +00001628 // Note that for 'regular' layers, there will be a single layer in each group and so this will be a no-op.
1629 for (auto layersWithSameTopIt : layersByTop)
1630 {
1631 const std::string& top = layersWithSameTopIt.first;
1632 const std::vector<caffe::LayerParameter*>& layersWithSameTop = layersWithSameTopIt.second;
1633
telsoa01c577f2c2018-08-31 09:22:23 +01001634 // Chains the layers together in the order that they are listed in the prototxt (hopefully this is correct).
telsoa014fcda012018-03-09 14:13:49 +00001635 // Note that the last layer will not have its top modified so that other layers will continue to reference it.
1636 for (unsigned int layerIdx = 0; layerIdx < layersWithSameTop.size() - 1; ++layerIdx)
1637 {
1638 caffe::LayerParameter& layer1 = *layersWithSameTop[layerIdx];
1639 caffe::LayerParameter& layer2 = *layersWithSameTop[layerIdx+1];
1640 if (layer1.top_size() != 1)
1641 {
telsoa01c577f2c2018-08-31 09:22:23 +01001642 throw ParseException(
1643 boost::str(
1644 boost::format(
1645 "Node '%1%' is an in-place layer but doesn't have exactly one "
1646 "top. It has %2% instead. %3%") %
1647 layer1.name() %
1648 layer1.top_size() %
1649 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001650 }
1651 std::string newTop = layer1.name() + "_top";
1652 layer1.set_top(0, newTop);
1653 if (layer2.bottom_size() != 1 || layer2.bottom(0) != top)
1654 {
telsoa01c577f2c2018-08-31 09:22:23 +01001655 throw ParseException(
1656 boost::str(
1657 boost::format(
1658 "Node '%1%' is an in-place layer but "
1659 "doesn't have exactly one bottom, or it doesn't match its top. "
1660 "#bottoms=%2%, first bottom is %3%, top is %4% %5%") %
1661 layer2.name() %
1662 layer2.bottom(0) %
1663 top %
1664 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001665 }
1666 layer2.set_bottom(0, newTop);
1667 }
1668 }
1669}
1670
telsoa01c577f2c2018-08-31 09:22:23 +01001671// Note: can move to CaffeParser when/if we optimise the text/string format
1672// to load on a layer by layer basis
1673void CaffeParserBase::LoadNetParam(NetParameter& netParameter)
telsoa014fcda012018-03-09 14:13:49 +00001674{
telsoa01c577f2c2018-08-31 09:22:23 +01001675 // Caffe models sometimes have an implicit input layer.
1676 // In that case, add an explicit one.
telsoa014fcda012018-03-09 14:13:49 +00001677 if (netParameter.input_size() > 0)
1678 {
1679 LayerParameter* newLayer = netParameter.add_layer();
1680
1681 newLayer->set_type("Input");
1682 newLayer->set_name(netParameter.input(0));
1683 newLayer->add_top(netParameter.input(0));
1684
1685 InputParameter* inputParam = newLayer->mutable_input_param();
1686 BlobShape* shape = inputParam->add_shape();
1687
1688 int dim_size = netParameter.input_dim_size();
1689 for (int i = 0; i < dim_size; ++i)
1690 {
1691 shape->add_dim(netParameter.input_dim(i));
1692 }
1693 }
1694
telsoa01c577f2c2018-08-31 09:22:23 +01001695 // Replaces in-place layers with regular ones to make the rest of the parsing easier.
telsoa014fcda012018-03-09 14:13:49 +00001696 ResolveInPlaceLayers(netParameter);
1697
telsoa01c577f2c2018-08-31 09:22:23 +01001698 // Creates a lookup of Caffe layers by name.
telsoa014fcda012018-03-09 14:13:49 +00001699 for (int i = 0; i < netParameter.layer_size(); ++i)
1700 {
1701 const caffe::LayerParameter& layer = netParameter.layer(i);
1702 for (int i = 0; i < layer.top_size(); ++i)
1703 {
1704 m_CaffeLayersByTopName[layer.top(i)] = &layer;
1705 }
1706 }
1707
telsoa01c577f2c2018-08-31 09:22:23 +01001708 // Finds the output layers the user requested.
telsoa014fcda012018-03-09 14:13:49 +00001709 std::vector<const caffe::LayerParameter*> targetLayers;
1710 for (const std::string& requestedOutputName : m_RequestedOutputs)
1711 {
1712 auto nodeIt = m_CaffeLayersByTopName.find(requestedOutputName);
1713 if (nodeIt == m_CaffeLayersByTopName.end())
1714 {
telsoa01c577f2c2018-08-31 09:22:23 +01001715 throw ParseException(
1716 boost::str(
1717 boost::format(
1718 "Couldn't find requested output layer '%1%' in graph %2%") %
1719 requestedOutputName %
1720 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001721 }
1722 targetLayers.push_back(nodeIt->second);
1723 }
1724
telsoa01c577f2c2018-08-31 09:22:23 +01001725 // Sorts them into a linear ordering such that all inputs of a node are before the node itself.
telsoa014fcda012018-03-09 14:13:49 +00001726 std::vector<const caffe::LayerParameter*> sortedNodes;
1727 if (!armnnUtils::GraphTopologicalSort<const caffe::LayerParameter*>(
1728 targetLayers,
1729 [this](const caffe::LayerParameter* node)
1730 {
1731 return GetInputs(*node);
1732 },
1733 sortedNodes))
1734 {
telsoa01c577f2c2018-08-31 09:22:23 +01001735 throw ParseException(
1736 boost::str(
1737 boost::format(
1738 "Cycle detected in graph. #nodes: %1% %2%") %
1739 sortedNodes.size() %
1740 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001741 }
1742
telsoa01c577f2c2018-08-31 09:22:23 +01001743 // Parses each node in order, knowing that all inputs of a node will be processed before the node itself.
telsoa014fcda012018-03-09 14:13:49 +00001744 for (const caffe::LayerParameter* current : sortedNodes)
1745 {
1746 auto it = ms_CaffeLayerNameToParsingFunctions.find(current->type());
1747 if (it == ms_CaffeLayerNameToParsingFunctions.end())
1748 {
telsoa01c577f2c2018-08-31 09:22:23 +01001749 throw ParseException(
1750 boost::str(
1751 boost::format("Unsupported layer type: '%1%' for layer %2% %3%") %
1752 current->type() %
1753 current->name() %
1754 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001755 }
1756 auto func = it->second;
1757 (this->*func)(*current);
1758 }
1759
telsoa01c577f2c2018-08-31 09:22:23 +01001760 // Adds ArmNN output layers connected to each requested output.
telsoa014fcda012018-03-09 14:13:49 +00001761 for (const std::string& requestedOutput : m_RequestedOutputs)
1762 {
1763 armnn::IOutputSlot& outputSlot = GetArmnnOutputSlotForCaffeTop(requestedOutput);
1764
1765 const armnn::LayerBindingId outputId = boost::numeric_cast<armnn::LayerBindingId>(
1766 m_NetworkOutputsBindingInfo.size());
1767 armnn::IConnectableLayer* const outputLayer = m_Network->AddOutputLayer(outputId, requestedOutput.c_str());
1768 outputSlot.Connect(outputLayer->GetInputSlot(0));
1769
1770 TrackOutputBinding(outputLayer, outputId, outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo());
1771 }
1772}
1773
telsoa01c577f2c2018-08-31 09:22:23 +01001774INetworkPtr CaffeParserBase::CreateNetworkFromTextFile(const char* graphFile,
telsoa014fcda012018-03-09 14:13:49 +00001775 const std::map<std::string, armnn::TensorShape>& inputShapes,
1776 const std::vector<std::string>& requestedOutputs)
1777{
1778 FILE* fd = fopen(graphFile, "r");
1779
1780 if (fd == nullptr)
1781 {
telsoa01c577f2c2018-08-31 09:22:23 +01001782 throw FileNotFoundException(
1783 boost::str(
1784 boost::format(
1785 "Failed to open graph file: %1% %2%") %
1786 graphFile %
1787 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001788 }
1789
telsoa01c577f2c2018-08-31 09:22:23 +01001790 // Parses the file into a message.
telsoa014fcda012018-03-09 14:13:49 +00001791 NetParameter netParam;
1792 auto input = new google::protobuf::io::FileInputStream(fileno(fd));
1793 bool success = google::protobuf::TextFormat::Parse(input, &netParam);
1794 delete input;
1795 fclose(fd);
1796
1797 if (!success)
1798 {
telsoa01c577f2c2018-08-31 09:22:23 +01001799 throw ParseException(
1800 boost::str(
1801 boost::format(
1802 "Failed to parse graph file: %1% %2%") %
1803 graphFile %
1804 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001805 }
1806
1807 return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1808}
1809
telsoa01c577f2c2018-08-31 09:22:23 +01001810INetworkPtr CaffeParserBase::CreateNetworkFromString(const char* protoText,
telsoa014fcda012018-03-09 14:13:49 +00001811 const std::map<std::string, armnn::TensorShape>& inputShapes,
1812 const std::vector<std::string>& requestedOutputs)
1813{
telsoa01c577f2c2018-08-31 09:22:23 +01001814 // Parses the string into a message.
telsoa014fcda012018-03-09 14:13:49 +00001815 NetParameter netParam;
1816 bool success = google::protobuf::TextFormat::ParseFromString(protoText, &netParam);
1817
1818 if (!success)
1819 {
telsoa01c577f2c2018-08-31 09:22:23 +01001820 throw ParseException(
1821 boost::str(
1822 boost::format(
1823 "Failed to parse graph string %1%") %
1824 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001825 }
1826
1827 return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1828}
1829
1830INetworkPtr CaffeParser::CreateNetworkFromBinaryFile(const char* graphFile,
1831 const std::map<std::string, armnn::TensorShape>& inputShapes,
1832 const std::vector<std::string>& requestedOutputs)
1833{
1834 FILE* fd = fopen(graphFile, "rb");
1835
1836 if (fd == nullptr)
1837 {
telsoa01c577f2c2018-08-31 09:22:23 +01001838 throw FileNotFoundException(
1839 boost::str(
1840 boost::format(
1841 "Failed to open graph file at: %1% %2%") %
1842 graphFile %
1843 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001844 }
1845
telsoa01c577f2c2018-08-31 09:22:23 +01001846 // Parses the file into a message.
telsoa014fcda012018-03-09 14:13:49 +00001847 NetParameter netParam;
1848
1849 FileInputStream inStream(fileno(fd));
1850 CodedInputStream codedStream(&inStream);
1851 codedStream.SetTotalBytesLimit(INT_MAX, INT_MAX);
1852 bool success = netParam.ParseFromCodedStream(&codedStream);
1853 fclose(fd);
1854
1855 if (!success)
1856 {
telsoa01c577f2c2018-08-31 09:22:23 +01001857 throw ParseException(
1858 boost::str(
1859 boost::format(
1860 "Failed to parse protobuf file: %1% %2%") %
1861 graphFile %
1862 CHECK_LOCATION().AsString()));
telsoa014fcda012018-03-09 14:13:49 +00001863 }
1864
1865 return CreateNetworkFromNetParameter(netParam, inputShapes, requestedOutputs);
1866}
1867
telsoa01c577f2c2018-08-31 09:22:23 +01001868// Note: can move to CaffeParser when/if we optimise the text/string format
1869// to load on a layer by layer basis
1870INetworkPtr CaffeParserBase::CreateNetworkFromNetParameter(NetParameter& netParam,
telsoa014fcda012018-03-09 14:13:49 +00001871 const std::map<std::string, armnn::TensorShape>& inputShapes,
1872 const std::vector<std::string>& requestedOutputs)
1873{
1874 m_NetworkInputsBindingInfo.clear();
1875 m_NetworkOutputsBindingInfo.clear();
1876
1877 m_Network = INetwork::Create();
1878
1879 m_InputShapes = inputShapes;
1880 if (requestedOutputs.size() == 0)
1881 {
1882 throw ParseException("requestedOutputs must have at least one entry");
1883 }
1884 m_RequestedOutputs = requestedOutputs;
1885
1886 try
1887 {
1888 LoadNetParam(netParam);
1889 }
1890 catch (const ParseException& e)
1891 {
1892 Cleanup();
1893 throw e;
1894 }
1895
1896 Cleanup();
1897
1898 return move(m_Network);
1899}
1900
telsoa01c577f2c2018-08-31 09:22:23 +01001901void CaffeParserBase::Cleanup() {
telsoa014fcda012018-03-09 14:13:49 +00001902 // cleanup, in case we reuse this parser
telsoa014fcda012018-03-09 14:13:49 +00001903 m_InputShapes.clear();
1904 m_RequestedOutputs.clear();
1905 m_ArmnnOutputSlotForCaffeTop.clear();
telsoa01c577f2c2018-08-31 09:22:23 +01001906 // NOTE: when we get the text/string format
1907 // optimised for memory then this data structure can
1908 // also move to the CaffeParser class
1909 m_CaffeLayersByTopName.clear();
telsoa014fcda012018-03-09 14:13:49 +00001910}
1911
1912}