blob: a789aade017239d62d386ff78736aa20d96b7b5b [file] [log] [blame]
Jan Eilerse38c4182021-09-02 13:12:11 +01001//
2// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5#include <armnn/INetwork.hpp>
6#include <armnn/IRuntime.hpp>
7#include <armnn/Utils.hpp>
8#include <armnn/Descriptors.hpp>
9
10#include <iostream>
11#include <thread>
12
13/// A simple example of using the ArmNN SDK API to run a network multiple times with different inputs in an asynchronous
14/// manner.
15///
16/// Background info: The usual runtime->EnqueueWorkload, which is used to trigger the execution of a network, is not
17/// thread safe. Each workload has memory assigned to it which would be overwritten by each thread.
18/// Before we added support for this you had to load a network multiple times to execute it at the
19/// same time. Every time a network is loaded, it takes up memory on your device. Making the
20/// execution thread safe helps to reduce the memory footprint for concurrent executions significantly.
21/// This example shows you how to execute a model concurrently (multiple threads) while still only
22/// loading it once.
23///
24/// As in most of our simple samples, the network in this example will ask the user for a single input number for each
25/// execution of the network.
26/// The network consists of a single fully connected layer with a single neuron. The neurons weight is set to 1.0f
27/// to produce an output number that is the same as the input.
28int main()
29{
30 using namespace armnn;
31
32 // The first part of this code is very similar to the SimpleSample.cpp you should check it out for comparison
33 // The interesting part starts when the graph is loaded into the runtime
34
35 std::vector<float> inputs;
36 float number1;
37 std::cout << "Please enter a number for the first iteration: " << std::endl;
38 std::cin >> number1;
39 float number2;
40 std::cout << "Please enter a number for the second iteration: " << std::endl;
41 std::cin >> number2;
42
43 // Turn on logging to standard output
44 // This is useful in this sample so that users can learn more about what is going on
45 ConfigureLogging(true, false, LogSeverity::Warning);
46
47 // Construct ArmNN network
48 NetworkId networkIdentifier;
49 INetworkPtr myNetwork = INetwork::Create();
50
51 float weightsData[] = {1.0f}; // Identity
Cathal Corbett5b8093c2021-10-22 11:12:07 +010052 TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
Jan Eilerse38c4182021-09-02 13:12:11 +010053 weightsInfo.SetConstant();
54 ConstTensor weights(weightsInfo, weightsData);
55
56 // Constant layer that now holds weights data for FullyConnected
57 IConnectableLayer* const constantWeightsLayer = myNetwork->AddConstantLayer(weights, "const weights");
58
59 FullyConnectedDescriptor fullyConnectedDesc;
60 IConnectableLayer* const fullyConnectedLayer = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
61 "fully connected");
62 IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
63 IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
64
65 InputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
66 constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
67 fullyConnectedLayer->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
68
69 // Create ArmNN runtime
70 IRuntime::CreationOptions options; // default options
71 IRuntimePtr run = IRuntime::Create(options);
72
73 //Set the tensors in the network.
74 TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
75 InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
76
77 TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
78 fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
79 constantWeightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
80
81 // Optimise ArmNN network
82 IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {Compute::CpuRef}, run->GetDeviceSpec());
83 if (!optNet)
84 {
85 // This shouldn't happen for this simple sample, with reference backend.
86 // But in general usage Optimize could fail if the hardware at runtime cannot
87 // support the model that has been provided.
88 std::cerr << "Error: Failed to optimise the input network." << std::endl;
89 return 1;
90 }
91
92 // Load graph into runtime.
93 std::string errmsg; // To hold an eventual error message if loading the network fails
94 // Add network properties to enable async execution. The MemorySource::Undefined variables indicate
95 // that neither inputs nor outputs will be imported. Importing will be covered in another example.
96 armnn::INetworkProperties networkProperties(true, MemorySource::Undefined, MemorySource::Undefined);
97 run->LoadNetwork(networkIdentifier,
98 std::move(optNet),
99 errmsg,
100 networkProperties);
101
102 // Creates structures for inputs and outputs. A vector of float for each execution.
103 std::vector<std::vector<float>> inputData{{number1}, {number2}};
104 std::vector<std::vector<float>> outputData;
105 outputData.resize(2, std::vector<float>(1));
106
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100107 inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
108 inputTensorInfo.SetConstant(true);
Jan Eilerse38c4182021-09-02 13:12:11 +0100109 std::vector<InputTensors> inputTensors
110 {
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100111 {{0, armnn::ConstTensor(inputTensorInfo, inputData[0].data())}},
112 {{0, armnn::ConstTensor(inputTensorInfo, inputData[1].data())}}
Jan Eilerse38c4182021-09-02 13:12:11 +0100113 };
114 std::vector<OutputTensors> outputTensors
115 {
116 {{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[0].data())}},
117 {{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[1].data())}}
118 };
119
120 // Lambda function to execute the network. We use it as thread function.
121 auto execute = [&](unsigned int executionIndex)
122 {
123 auto memHandle = run->CreateWorkingMemHandle(networkIdentifier);
124 run->Execute(*memHandle, inputTensors[executionIndex], outputTensors[executionIndex]);
125 };
126
127 // Prepare some threads and let each execute the network with a different input
128 std::vector<std::thread> threads;
129 for (unsigned int i = 0; i < inputTensors.size(); ++i)
130 {
131 threads.emplace_back(std::thread(execute, i));
132 }
133
134 // Wait for the threads to finish
135 for (std::thread& t : threads)
136 {
137 if(t.joinable())
138 {
139 t.join();
140 }
141 }
142
143 std::cout << "Your numbers were " << outputData[0][0] << " and " << outputData[1][0] << std::endl;
144 return 0;
145
146}