Jan Eilers | e38c418 | 2021-09-02 13:12:11 +0100 | [diff] [blame] | 1 | // |
| 2 | // Copyright © 2021 Arm Ltd and Contributors. All rights reserved. |
| 3 | // SPDX-License-Identifier: MIT |
| 4 | // |
| 5 | #include <armnn/INetwork.hpp> |
| 6 | #include <armnn/IRuntime.hpp> |
| 7 | #include <armnn/Utils.hpp> |
| 8 | #include <armnn/Descriptors.hpp> |
| 9 | |
| 10 | #include <iostream> |
| 11 | #include <thread> |
| 12 | |
| 13 | /// A simple example of using the ArmNN SDK API to run a network multiple times with different inputs in an asynchronous |
| 14 | /// manner. |
| 15 | /// |
| 16 | /// Background info: The usual runtime->EnqueueWorkload, which is used to trigger the execution of a network, is not |
| 17 | /// thread safe. Each workload has memory assigned to it which would be overwritten by each thread. |
| 18 | /// Before we added support for this you had to load a network multiple times to execute it at the |
| 19 | /// same time. Every time a network is loaded, it takes up memory on your device. Making the |
| 20 | /// execution thread safe helps to reduce the memory footprint for concurrent executions significantly. |
| 21 | /// This example shows you how to execute a model concurrently (multiple threads) while still only |
| 22 | /// loading it once. |
| 23 | /// |
| 24 | /// As in most of our simple samples, the network in this example will ask the user for a single input number for each |
| 25 | /// execution of the network. |
| 26 | /// The network consists of a single fully connected layer with a single neuron. The neurons weight is set to 1.0f |
| 27 | /// to produce an output number that is the same as the input. |
| 28 | int main() |
| 29 | { |
| 30 | using namespace armnn; |
| 31 | |
| 32 | // The first part of this code is very similar to the SimpleSample.cpp you should check it out for comparison |
| 33 | // The interesting part starts when the graph is loaded into the runtime |
| 34 | |
| 35 | std::vector<float> inputs; |
| 36 | float number1; |
| 37 | std::cout << "Please enter a number for the first iteration: " << std::endl; |
| 38 | std::cin >> number1; |
| 39 | float number2; |
| 40 | std::cout << "Please enter a number for the second iteration: " << std::endl; |
| 41 | std::cin >> number2; |
| 42 | |
| 43 | // Turn on logging to standard output |
| 44 | // This is useful in this sample so that users can learn more about what is going on |
| 45 | ConfigureLogging(true, false, LogSeverity::Warning); |
| 46 | |
| 47 | // Construct ArmNN network |
| 48 | NetworkId networkIdentifier; |
| 49 | INetworkPtr myNetwork = INetwork::Create(); |
| 50 | |
| 51 | float weightsData[] = {1.0f}; // Identity |
Cathal Corbett | 5b8093c | 2021-10-22 11:12:07 +0100 | [diff] [blame] | 52 | TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true); |
Jan Eilers | e38c418 | 2021-09-02 13:12:11 +0100 | [diff] [blame] | 53 | weightsInfo.SetConstant(); |
| 54 | ConstTensor weights(weightsInfo, weightsData); |
| 55 | |
| 56 | // Constant layer that now holds weights data for FullyConnected |
| 57 | IConnectableLayer* const constantWeightsLayer = myNetwork->AddConstantLayer(weights, "const weights"); |
| 58 | |
| 59 | FullyConnectedDescriptor fullyConnectedDesc; |
| 60 | IConnectableLayer* const fullyConnectedLayer = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc, |
| 61 | "fully connected"); |
| 62 | IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0); |
| 63 | IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0); |
| 64 | |
| 65 | InputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0)); |
| 66 | constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1)); |
| 67 | fullyConnectedLayer->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0)); |
| 68 | |
| 69 | // Create ArmNN runtime |
| 70 | IRuntime::CreationOptions options; // default options |
| 71 | IRuntimePtr run = IRuntime::Create(options); |
| 72 | |
| 73 | //Set the tensors in the network. |
| 74 | TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32); |
| 75 | InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); |
| 76 | |
| 77 | TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32); |
| 78 | fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); |
| 79 | constantWeightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); |
| 80 | |
| 81 | // Optimise ArmNN network |
| 82 | IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {Compute::CpuRef}, run->GetDeviceSpec()); |
| 83 | if (!optNet) |
| 84 | { |
| 85 | // This shouldn't happen for this simple sample, with reference backend. |
| 86 | // But in general usage Optimize could fail if the hardware at runtime cannot |
| 87 | // support the model that has been provided. |
| 88 | std::cerr << "Error: Failed to optimise the input network." << std::endl; |
| 89 | return 1; |
| 90 | } |
| 91 | |
| 92 | // Load graph into runtime. |
| 93 | std::string errmsg; // To hold an eventual error message if loading the network fails |
| 94 | // Add network properties to enable async execution. The MemorySource::Undefined variables indicate |
| 95 | // that neither inputs nor outputs will be imported. Importing will be covered in another example. |
| 96 | armnn::INetworkProperties networkProperties(true, MemorySource::Undefined, MemorySource::Undefined); |
| 97 | run->LoadNetwork(networkIdentifier, |
| 98 | std::move(optNet), |
| 99 | errmsg, |
| 100 | networkProperties); |
| 101 | |
| 102 | // Creates structures for inputs and outputs. A vector of float for each execution. |
| 103 | std::vector<std::vector<float>> inputData{{number1}, {number2}}; |
| 104 | std::vector<std::vector<float>> outputData; |
| 105 | outputData.resize(2, std::vector<float>(1)); |
| 106 | |
Cathal Corbett | 5b8093c | 2021-10-22 11:12:07 +0100 | [diff] [blame] | 107 | inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0); |
| 108 | inputTensorInfo.SetConstant(true); |
Jan Eilers | e38c418 | 2021-09-02 13:12:11 +0100 | [diff] [blame] | 109 | std::vector<InputTensors> inputTensors |
| 110 | { |
Cathal Corbett | 5b8093c | 2021-10-22 11:12:07 +0100 | [diff] [blame] | 111 | {{0, armnn::ConstTensor(inputTensorInfo, inputData[0].data())}}, |
| 112 | {{0, armnn::ConstTensor(inputTensorInfo, inputData[1].data())}} |
Jan Eilers | e38c418 | 2021-09-02 13:12:11 +0100 | [diff] [blame] | 113 | }; |
| 114 | std::vector<OutputTensors> outputTensors |
| 115 | { |
| 116 | {{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[0].data())}}, |
| 117 | {{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[1].data())}} |
| 118 | }; |
| 119 | |
| 120 | // Lambda function to execute the network. We use it as thread function. |
| 121 | auto execute = [&](unsigned int executionIndex) |
| 122 | { |
| 123 | auto memHandle = run->CreateWorkingMemHandle(networkIdentifier); |
| 124 | run->Execute(*memHandle, inputTensors[executionIndex], outputTensors[executionIndex]); |
| 125 | }; |
| 126 | |
| 127 | // Prepare some threads and let each execute the network with a different input |
| 128 | std::vector<std::thread> threads; |
| 129 | for (unsigned int i = 0; i < inputTensors.size(); ++i) |
| 130 | { |
| 131 | threads.emplace_back(std::thread(execute, i)); |
| 132 | } |
| 133 | |
| 134 | // Wait for the threads to finish |
| 135 | for (std::thread& t : threads) |
| 136 | { |
| 137 | if(t.joinable()) |
| 138 | { |
| 139 | t.join(); |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | std::cout << "Your numbers were " << outputData[0][0] << " and " << outputData[1][0] << std::endl; |
| 144 | return 0; |
| 145 | |
| 146 | } |