telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 1 | // |
| 2 | // Copyright © 2017 Arm Ltd. All rights reserved. |
David Beck | ecb56cd | 2018-09-05 12:52:57 +0100 | [diff] [blame] | 3 | // SPDX-License-Identifier: MIT |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 4 | // |
| 5 | #pragma once |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 6 | #include "armnn/ArmNN.hpp" |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 7 | |
| 8 | #if defined(ARMNN_TF_LITE_PARSER) |
| 9 | #include "armnnTfLiteParser/ITfLiteParser.hpp" |
| 10 | #endif |
| 11 | |
| 12 | #include <HeapProfiling.hpp> |
| 13 | #if defined(ARMNN_ONNX_PARSER) |
| 14 | #include "armnnOnnxParser/IOnnxParser.hpp" |
| 15 | #endif |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 16 | |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 17 | #include <boost/exception/exception.hpp> |
| 18 | #include <boost/exception/diagnostic_information.hpp> |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 19 | #include <boost/log/trivial.hpp> |
| 20 | #include <boost/format.hpp> |
| 21 | #include <boost/program_options.hpp> |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 22 | #include <boost/filesystem.hpp> |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 23 | |
| 24 | #include <map> |
| 25 | #include <string> |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 26 | #include <fstream> |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 27 | #include <type_traits> |
| 28 | |
| 29 | namespace InferenceModelInternal |
| 30 | { |
| 31 | // This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser |
| 32 | // definitions of BindingPointInfo gets consolidated. |
| 33 | using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>; |
| 34 | |
| 35 | using QuantizationParams = std::pair<float,int32_t>; |
| 36 | |
| 37 | struct Params |
| 38 | { |
| 39 | std::string m_ModelPath; |
| 40 | std::string m_InputBinding; |
| 41 | std::string m_OutputBinding; |
| 42 | const armnn::TensorShape* m_InputTensorShape; |
| 43 | std::vector<armnn::Compute> m_ComputeDevice; |
| 44 | bool m_EnableProfiling; |
| 45 | size_t m_SubgraphId; |
| 46 | bool m_IsModelBinary; |
| 47 | bool m_VisualizePostOptimizationModel; |
| 48 | bool m_EnableFp16TurboMode; |
| 49 | |
| 50 | Params() |
| 51 | : m_InputTensorShape(nullptr) |
| 52 | , m_ComputeDevice{armnn::Compute::CpuRef} |
| 53 | , m_EnableProfiling(false) |
| 54 | , m_SubgraphId(0) |
| 55 | , m_IsModelBinary(true) |
| 56 | , m_VisualizePostOptimizationModel(false) |
| 57 | , m_EnableFp16TurboMode(false) |
| 58 | {} |
| 59 | }; |
| 60 | |
| 61 | } // namespace InferenceModelInternal |
| 62 | |
| 63 | template <typename IParser> |
| 64 | struct CreateNetworkImpl |
| 65 | { |
| 66 | public: |
| 67 | using Params = InferenceModelInternal::Params; |
| 68 | using BindingPointInfo = InferenceModelInternal::BindingPointInfo; |
| 69 | |
| 70 | static armnn::INetworkPtr Create(const Params& params, |
| 71 | BindingPointInfo& inputBindings, |
| 72 | BindingPointInfo& outputBindings) |
| 73 | { |
| 74 | const std::string& modelPath = params.m_ModelPath; |
| 75 | |
| 76 | // Create a network from a file on disk |
| 77 | auto parser(IParser::Create()); |
| 78 | |
| 79 | std::map<std::string, armnn::TensorShape> inputShapes; |
| 80 | if (params.m_InputTensorShape) |
| 81 | { |
| 82 | inputShapes[params.m_InputBinding] = *params.m_InputTensorShape; |
| 83 | } |
| 84 | std::vector<std::string> requestedOutputs{ params.m_OutputBinding }; |
| 85 | armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; |
| 86 | |
| 87 | { |
| 88 | ARMNN_SCOPED_HEAP_PROFILING("Parsing"); |
| 89 | // Handle text and binary input differently by calling the corresponding parser function |
| 90 | network = (params.m_IsModelBinary ? |
| 91 | parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) : |
| 92 | parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs)); |
| 93 | } |
| 94 | |
| 95 | inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding); |
| 96 | outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding); |
| 97 | return network; |
| 98 | } |
| 99 | }; |
| 100 | |
| 101 | #if defined(ARMNN_TF_LITE_PARSER) |
| 102 | template <> |
| 103 | struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser> |
| 104 | { |
| 105 | public: |
| 106 | using IParser = armnnTfLiteParser::ITfLiteParser; |
| 107 | using Params = InferenceModelInternal::Params; |
| 108 | using BindingPointInfo = InferenceModelInternal::BindingPointInfo; |
| 109 | |
| 110 | static armnn::INetworkPtr Create(const Params& params, |
| 111 | BindingPointInfo& inputBindings, |
| 112 | BindingPointInfo& outputBindings) |
| 113 | { |
| 114 | const std::string& modelPath = params.m_ModelPath; |
| 115 | |
| 116 | // Create a network from a file on disk |
| 117 | auto parser(IParser::Create()); |
| 118 | |
| 119 | armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; |
| 120 | |
| 121 | { |
| 122 | ARMNN_SCOPED_HEAP_PROFILING("Parsing"); |
| 123 | network = parser->CreateNetworkFromBinaryFile(modelPath.c_str()); |
| 124 | } |
| 125 | |
| 126 | inputBindings = parser->GetNetworkInputBindingInfo(params.m_SubgraphId, params.m_InputBinding); |
| 127 | outputBindings = parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, params.m_OutputBinding); |
| 128 | return network; |
| 129 | } |
| 130 | }; |
| 131 | #endif |
| 132 | |
| 133 | #if defined(ARMNN_ONNX_PARSER) |
| 134 | template <> |
| 135 | struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser> |
| 136 | { |
| 137 | public: |
| 138 | using IParser = armnnOnnxParser::IOnnxParser; |
| 139 | using Params = InferenceModelInternal::Params; |
| 140 | using BindingPointInfo = InferenceModelInternal::BindingPointInfo; |
| 141 | |
| 142 | static armnn::INetworkPtr Create(const Params& params, |
| 143 | BindingPointInfo& inputBindings, |
| 144 | BindingPointInfo& outputBindings) |
| 145 | { |
| 146 | const std::string& modelPath = params.m_ModelPath; |
| 147 | |
| 148 | // Create a network from a file on disk |
| 149 | auto parser(IParser::Create()); |
| 150 | |
| 151 | armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; |
| 152 | |
| 153 | { |
| 154 | ARMNN_SCOPED_HEAP_PROFILING("Parsing"); |
| 155 | network = (params.m_IsModelBinary ? |
| 156 | parser->CreateNetworkFromBinaryFile(modelPath.c_str()) : |
| 157 | parser->CreateNetworkFromTextFile(modelPath.c_str())); |
| 158 | } |
| 159 | |
| 160 | inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding); |
| 161 | outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding); |
| 162 | return network; |
| 163 | } |
| 164 | }; |
| 165 | #endif |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 166 | |
| 167 | template<typename TContainer> |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 168 | inline armnn::InputTensors MakeInputTensors(const InferenceModelInternal::BindingPointInfo& input, |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 169 | const TContainer& inputTensorData) |
| 170 | { |
| 171 | if (inputTensorData.size() != input.second.GetNumElements()) |
| 172 | { |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 173 | try |
| 174 | { |
| 175 | throw armnn::Exception(boost::str(boost::format("Input tensor has incorrect size. Expected %1% elements " |
| 176 | "but got %2%.") % input.second.GetNumElements() % inputTensorData.size())); |
| 177 | } catch (const boost::exception& e) |
| 178 | { |
| 179 | // Coverity fix: it should not be possible to get here but boost::str and boost::format can both |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 180 | // throw uncaught exceptions, convert them to armnn exceptions and rethrow. |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 181 | throw armnn::Exception(diagnostic_information(e)); |
| 182 | } |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 183 | } |
| 184 | return { { input.first, armnn::ConstTensor(input.second, inputTensorData.data()) } }; |
| 185 | } |
| 186 | |
| 187 | template<typename TContainer> |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 188 | inline armnn::OutputTensors MakeOutputTensors(const InferenceModelInternal::BindingPointInfo& output, |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 189 | TContainer& outputTensorData) |
| 190 | { |
| 191 | if (outputTensorData.size() != output.second.GetNumElements()) |
| 192 | { |
| 193 | throw armnn::Exception("Output tensor has incorrect size"); |
| 194 | } |
| 195 | return { { output.first, armnn::Tensor(output.second, outputTensorData.data()) } }; |
| 196 | } |
| 197 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 198 | |
| 199 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 200 | template <typename IParser, typename TDataType> |
| 201 | class InferenceModel |
| 202 | { |
| 203 | public: |
| 204 | using DataType = TDataType; |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 205 | using Params = InferenceModelInternal::Params; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 206 | |
| 207 | struct CommandLineOptions |
| 208 | { |
| 209 | std::string m_ModelDir; |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 210 | std::vector<armnn::Compute> m_ComputeDevice; |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 211 | bool m_VisualizePostOptimizationModel; |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 212 | bool m_EnableFp16TurboMode; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 213 | }; |
| 214 | |
| 215 | static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options) |
| 216 | { |
| 217 | namespace po = boost::program_options; |
| 218 | |
| 219 | desc.add_options() |
| 220 | ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(), |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 221 | "Path to directory containing model files (.caffemodel/.prototxt/.tflite)") |
| 222 | ("compute,c", po::value<std::vector<armnn::Compute>>(&options.m_ComputeDevice)->default_value |
| 223 | ({armnn::Compute::CpuAcc, armnn::Compute::CpuRef}), |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 224 | "Which device to run layers on by default. Possible choices: CpuAcc, CpuRef, GpuAcc") |
| 225 | ("visualize-optimized-model,v", |
| 226 | po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false), |
| 227 | "Produce a dot file useful for visualizing the graph post optimization." |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 228 | "The file will have the same name as the model with the .dot extention.") |
| 229 | ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false), |
| 230 | "If this option is enabled FP32 layers, weights and biases will be converted " |
| 231 | "to FP16 where the backend supports it."); |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 232 | } |
| 233 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 234 | InferenceModel(const Params& params, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr) |
| 235 | : m_EnableProfiling(params.m_EnableProfiling) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 236 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 237 | if (runtime) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 238 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 239 | m_Runtime = runtime; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 240 | } |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 241 | else |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 242 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 243 | armnn::IRuntime::CreationOptions options; |
Nina Drozd | 549ae37 | 2018-09-10 14:26:44 +0100 | [diff] [blame] | 244 | options.m_EnableGpuProfiling = m_EnableProfiling; |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 245 | m_Runtime = std::move(armnn::IRuntime::Create(options)); |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 246 | } |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 247 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 248 | armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindingInfo, |
| 249 | m_OutputBindingInfo); |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 250 | |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 251 | armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}}; |
| 252 | { |
| 253 | ARMNN_SCOPED_HEAP_PROFILING("Optimizing"); |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 254 | |
| 255 | armnn::OptimizerOptions options; |
| 256 | options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode; |
| 257 | |
| 258 | optNet = armnn::Optimize(*network, params.m_ComputeDevice, m_Runtime->GetDeviceSpec(), options); |
| 259 | if (!optNet) |
| 260 | { |
| 261 | throw armnn::Exception("Optimize returned nullptr"); |
| 262 | } |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 263 | } |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 264 | |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 265 | if (params.m_VisualizePostOptimizationModel) |
| 266 | { |
| 267 | boost::filesystem::path filename = params.m_ModelPath; |
| 268 | filename.replace_extension("dot"); |
| 269 | std::fstream file(filename.c_str(),file.out); |
| 270 | optNet->SerializeToDot(file); |
| 271 | } |
| 272 | |
| 273 | armnn::Status ret; |
| 274 | { |
| 275 | ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork"); |
| 276 | ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet)); |
| 277 | } |
| 278 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 279 | if (ret == armnn::Status::Failure) |
| 280 | { |
| 281 | throw armnn::Exception("IRuntime::LoadNetwork failed"); |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | unsigned int GetOutputSize() const |
| 286 | { |
| 287 | return m_OutputBindingInfo.second.GetNumElements(); |
| 288 | } |
| 289 | |
| 290 | void Run(const std::vector<TDataType>& input, std::vector<TDataType>& output) |
| 291 | { |
| 292 | BOOST_ASSERT(output.size() == GetOutputSize()); |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 293 | |
| 294 | std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier); |
| 295 | if (profiler) |
| 296 | { |
| 297 | profiler->EnableProfiling(m_EnableProfiling); |
| 298 | } |
| 299 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 300 | armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier, |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 301 | MakeInputTensors(input), |
| 302 | MakeOutputTensors(output)); |
Sadik Armagan | 2b7a158 | 2018-09-05 16:33:58 +0100 | [diff] [blame] | 303 | |
| 304 | // if profiling is enabled print out the results |
| 305 | if (profiler && profiler->IsProfilingEnabled()) |
| 306 | { |
| 307 | profiler->Print(std::cout); |
| 308 | } |
| 309 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 310 | if (ret == armnn::Status::Failure) |
| 311 | { |
| 312 | throw armnn::Exception("IRuntime::EnqueueWorkload failed"); |
| 313 | } |
| 314 | } |
| 315 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 316 | const InferenceModelInternal::BindingPointInfo & GetInputBindingInfo() const |
| 317 | { |
| 318 | return m_InputBindingInfo; |
| 319 | } |
| 320 | |
| 321 | const InferenceModelInternal::BindingPointInfo & GetOutputBindingInfo() const |
| 322 | { |
| 323 | return m_OutputBindingInfo; |
| 324 | } |
| 325 | |
| 326 | InferenceModelInternal::QuantizationParams GetQuantizationParams() const |
| 327 | { |
| 328 | return std::make_pair(m_OutputBindingInfo.second.GetQuantizationScale(), |
| 329 | m_OutputBindingInfo.second.GetQuantizationOffset()); |
| 330 | } |
| 331 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 332 | private: |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 333 | armnn::NetworkId m_NetworkIdentifier; |
| 334 | std::shared_ptr<armnn::IRuntime> m_Runtime; |
| 335 | |
| 336 | InferenceModelInternal::BindingPointInfo m_InputBindingInfo; |
| 337 | InferenceModelInternal::BindingPointInfo m_OutputBindingInfo; |
| 338 | bool m_EnableProfiling; |
| 339 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 340 | template<typename TContainer> |
| 341 | armnn::InputTensors MakeInputTensors(const TContainer& inputTensorData) |
| 342 | { |
| 343 | return ::MakeInputTensors(m_InputBindingInfo, inputTensorData); |
| 344 | } |
| 345 | |
| 346 | template<typename TContainer> |
| 347 | armnn::OutputTensors MakeOutputTensors(TContainer& outputTensorData) |
| 348 | { |
| 349 | return ::MakeOutputTensors(m_OutputBindingInfo, outputTensorData); |
| 350 | } |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 351 | }; |