Blame - samples/AsyncExecutionSample.cpp - ml/armnn

blob: 6d2fe243dd18b254a0191afb50caec93d6e79ec5 [file] [log] [blame]

Jan Eilers	e38c418	2021-09-02 13:12:11 +0100	[diff] [blame^]	1	//
				2	// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5	#include <armnn/INetwork.hpp>
				6	#include <armnn/IRuntime.hpp>
				7	#include <armnn/Utils.hpp>
				8	#include <armnn/Descriptors.hpp>
				9
				10	#include <iostream>
				11	#include <thread>
				12
				13	/// A simple example of using the ArmNN SDK API to run a network multiple times with different inputs in an asynchronous
				14	/// manner.
				15	///
				16	/// Background info: The usual runtime->EnqueueWorkload, which is used to trigger the execution of a network, is not
				17	/// thread safe. Each workload has memory assigned to it which would be overwritten by each thread.
				18	/// Before we added support for this you had to load a network multiple times to execute it at the
				19	/// same time. Every time a network is loaded, it takes up memory on your device. Making the
				20	/// execution thread safe helps to reduce the memory footprint for concurrent executions significantly.
				21	/// This example shows you how to execute a model concurrently (multiple threads) while still only
				22	/// loading it once.
				23	///
				24	/// As in most of our simple samples, the network in this example will ask the user for a single input number for each
				25	/// execution of the network.
				26	/// The network consists of a single fully connected layer with a single neuron. The neurons weight is set to 1.0f
				27	/// to produce an output number that is the same as the input.
				28	int main()
				29	{
				30	using namespace armnn;
				31
				32	// The first part of this code is very similar to the SimpleSample.cpp you should check it out for comparison
				33	// The interesting part starts when the graph is loaded into the runtime
				34
				35	std::vector<float> inputs;
				36	float number1;
				37	std::cout << "Please enter a number for the first iteration: " << std::endl;
				38	std::cin >> number1;
				39	float number2;
				40	std::cout << "Please enter a number for the second iteration: " << std::endl;
				41	std::cin >> number2;
				42
				43	// Turn on logging to standard output
				44	// This is useful in this sample so that users can learn more about what is going on
				45	ConfigureLogging(true, false, LogSeverity::Warning);
				46
				47	// Construct ArmNN network
				48	NetworkId networkIdentifier;
				49	INetworkPtr myNetwork = INetwork::Create();
				50
				51	float weightsData[] = {1.0f}; // Identity
				52	TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32);
				53	weightsInfo.SetConstant();
				54	ConstTensor weights(weightsInfo, weightsData);
				55
				56	// Constant layer that now holds weights data for FullyConnected
				57	IConnectableLayer* const constantWeightsLayer = myNetwork->AddConstantLayer(weights, "const weights");
				58
				59	FullyConnectedDescriptor fullyConnectedDesc;
				60	IConnectableLayer* const fullyConnectedLayer = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
				61	"fully connected");
				62	IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
				63	IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
				64
				65	InputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
				66	constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
				67	fullyConnectedLayer->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
				68
				69	// Create ArmNN runtime
				70	IRuntime::CreationOptions options; // default options
				71	IRuntimePtr run = IRuntime::Create(options);
				72
				73	//Set the tensors in the network.
				74	TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
				75	InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
				76
				77	TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
				78	fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
				79	constantWeightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
				80
				81	// Optimise ArmNN network
				82	IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {Compute::CpuRef}, run->GetDeviceSpec());
				83	if (!optNet)
				84	{
				85	// This shouldn't happen for this simple sample, with reference backend.
				86	// But in general usage Optimize could fail if the hardware at runtime cannot
				87	// support the model that has been provided.
				88	std::cerr << "Error: Failed to optimise the input network." << std::endl;
				89	return 1;
				90	}
				91
				92	// Load graph into runtime.
				93	std::string errmsg; // To hold an eventual error message if loading the network fails
				94	// Add network properties to enable async execution. The MemorySource::Undefined variables indicate
				95	// that neither inputs nor outputs will be imported. Importing will be covered in another example.
				96	armnn::INetworkProperties networkProperties(true, MemorySource::Undefined, MemorySource::Undefined);
				97	run->LoadNetwork(networkIdentifier,
				98	std::move(optNet),
				99	errmsg,
				100	networkProperties);
				101
				102	// Creates structures for inputs and outputs. A vector of float for each execution.
				103	std::vector<std::vector<float>> inputData{{number1}, {number2}};
				104	std::vector<std::vector<float>> outputData;
				105	outputData.resize(2, std::vector<float>(1));
				106
				107
				108	std::vector<InputTensors> inputTensors
				109	{
				110	{{0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputData[0].data())}},
				111	{{0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputData[1].data())}}
				112	};
				113	std::vector<OutputTensors> outputTensors
				114	{
				115	{{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[0].data())}},
				116	{{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[1].data())}}
				117	};
				118
				119	// Lambda function to execute the network. We use it as thread function.
				120	auto execute = [&](unsigned int executionIndex)
				121	{
				122	auto memHandle = run->CreateWorkingMemHandle(networkIdentifier);
				123	run->Execute(*memHandle, inputTensors[executionIndex], outputTensors[executionIndex]);
				124	};
				125
				126	// Prepare some threads and let each execute the network with a different input
				127	std::vector<std::thread> threads;
				128	for (unsigned int i = 0; i < inputTensors.size(); ++i)
				129	{
				130	threads.emplace_back(std::thread(execute, i));
				131	}
				132
				133	// Wait for the threads to finish
				134	for (std::thread& t : threads)
				135	{
				136	if(t.joinable())
				137	{
				138	t.join();
				139	}
				140	}
				141
				142	std::cout << "Your numbers were " << outputData[0][0] << " and " << outputData[1][0] << std::endl;
				143	return 0;
				144
				145	}