Blame - src/backends/backendsCommon/test/EndToEndTestImpl.hpp - ml/armnn

blob: 77901df44479bce38359d7ca524716adc3733707 [file] [log] [blame]

Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5	#pragma once
				6
Sadik Armagan	a097d2a	2021-11-24 15:47:28 +0000	[diff] [blame]	7	#include <CommonTestUtils.hpp>
Mike Kelly	386ff1a	2021-03-29 15:04:50 +0100	[diff] [blame]	8
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	9	#include <armnn/Descriptors.hpp>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	10	#include <armnn/INetwork.hpp>
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	11	#include <armnn/IRuntime.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	12
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	13	#include <Profiling.hpp>
Colm Donelan	c42a987	2022-02-02 16:35:09 +0000	[diff] [blame]	14	#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	15	#include <ResolveType.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	16
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	17	#include <doctest/doctest.h>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	18
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	19	#include <vector>
				20
				21	namespace
				22	{
				23
				24	using namespace armnn;
				25
				26	template<typename T>
				27	bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
				28	const TensorInfo& commonTensorInfo,
				29	const std::vector<T>& inputData,
				30	const std::vector<T>& constantData,
				31	const std::vector<T>& expectedOutputData)
				32	{
				33	// Create runtime in which test will run
				34	IRuntime::CreationOptions options;
				35	IRuntimePtr runtime(IRuntime::Create(options));
				36
				37	// Builds up the structure of the network.
				38	INetworkPtr net(INetwork::Create());
				39
				40	IConnectableLayer* input = net->AddInputLayer(0);
				41	IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
				42	IConnectableLayer* add = net->AddAdditionLayer();
				43	IConnectableLayer* output = net->AddOutputLayer(0);
				44
				45	input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
				46	constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
				47	add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				48
				49	// Sets the tensors in the network.
				50	input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				51	constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				52	add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				53
				54	// optimize the network
				55	IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
				56
				57	// Loads it into the runtime.
				58	NetworkId netId;
				59	runtime->LoadNetwork(netId, std::move(optNet));
				60
				61	// Creates structures for input & output.
				62	std::vector<T> outputData(inputData.size());
				63
				64	InputTensors inputTensors
				65	{
				66	{0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
				67	};
				68	OutputTensors outputTensors
				69	{
				70	{0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				71	};
				72
				73	// Does the inference.
				74	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				75
				76	// Checks the results.
				77	return outputData == expectedOutputData;
				78	}
				79
				80	inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
				81	{
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	82	TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
				83	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	84
				85	return ConstantUsageTest(backends,
				86	commonTensorInfo,
				87	std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
				88	std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
				89	std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
				90	);
				91	}
				92
				93	inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
				94	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	95	TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	96
				97	const float scale = 0.023529f;
				98	const int8_t offset = -43;
				99
				100	commonTensorInfo.SetQuantizationScale(scale);
				101	commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	102	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	103
				104	return ConstantUsageTest(backends,
				105	commonTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	106	armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
				107	armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
				108	armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	109	);
				110	}
				111
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	112	// Utility function to find the number of instances of a substring within a string.
				113	int SubStringCounter(std::string& string, std::string&& substring)
				114	{
				115	std::size_t found = 0;
				116	int count = 0;
				117	// Look for the substring starting from where we last found the substring
				118	while((found = string.find(substring, found)) != std::string::npos)
				119	{
				120	count++;
				121	// Offset by substring length to avoid finding the same substring twice
				122	found += substring.length();
				123	}
				124	return count;
				125	}
				126
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	127	template<DataType ArmnnIType, DataType ArmnnOType,
				128	typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	129	void EndToEndLayerTestImpl(INetworkPtr network,
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	130	const std::map<int, std::vector<TInput>>& inputTensorData,
				131	const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilers	bca73e1	2020-03-11 12:52:46 +0000	[diff] [blame]	132	std::vector<BackendId> backends,
				133	float tolerance = 0.000001f)
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	134	{
				135	// Create runtime in which test will run
				136	IRuntime::CreationOptions options;
				137	IRuntimePtr runtime(IRuntime::Create(options));
				138
				139	// optimize the network
				140	IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
				141
				142	// Loads it into the runtime.
				143	NetworkId netId;
				144	runtime->LoadNetwork(netId, std::move(optNet));
				145
				146	InputTensors inputTensors;
				147	inputTensors.reserve(inputTensorData.size());
				148	for (auto&& it : inputTensorData)
				149	{
				150	inputTensors.push_back({it.first,
				151	ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
				152	}
				153	OutputTensors outputTensors;
				154	outputTensors.reserve(expectedOutputData.size());
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	155	std::map<int, std::vector<TOutput>> outputStorage;
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	156	for (auto&& it : expectedOutputData)
				157	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	158	std::vector<TOutput> out(it.second.size());
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	159	outputStorage.emplace(it.first, out);
				160	outputTensors.push_back({it.first,
				161	Tensor(runtime->GetOutputTensorInfo(netId, it.first),
				162	outputStorage.at(it.first).data())});
				163	}
				164
				165	// Does the inference.
				166	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				167
				168	// Checks the results.
				169	for (auto&& it : expectedOutputData)
				170	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	171	std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tar	f97f6da	2019-10-01 18:35:44 +0100	[diff] [blame]	172	for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	173	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	174	CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Teresa Charlin	2e3f4d2	2020-07-29 14:29:20 +0100	[diff] [blame]	175	"Actual output: " << out[i] << ". Expected output:" << it.second[i]);
				176
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	177	}
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	178	}
				179	}
				180
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	181	inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	182	{
				183	using namespace armnn;
				184
				185	// Create runtime in which test will run
				186	IRuntime::CreationOptions options;
				187	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				188
				189	// build up the structure of the network
				190	INetworkPtr net(INetwork::Create());
				191
				192	IConnectableLayer* input = net->AddInputLayer(0);
				193
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	194	ActivationDescriptor descriptor;
				195	descriptor.m_Function = ActivationFunction::Square;
				196	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	197
				198	IConnectableLayer* output = net->AddOutputLayer(0);
				199
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	200	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				201	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	202
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	203	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	204	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	205
				206	// Optimize the network
James Conroy	a0f8b15	2022-06-21 11:31:47 +0000	[diff] [blame^]	207	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	208	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	209
				210	// Loads it into the runtime.
				211	NetworkId netId;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	212	std::string ignoredErrorMessage;
				213	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	214	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	215	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	216
				217	// Creates structures for input & output
				218	std::vector<float> inputData
				219	{
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	220	1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	221	};
				222
				223	// Misaligned input
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	224	float* misalignedInputData = reinterpret_cast<float>(reinterpret_cast<char>(inputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	225
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	226	std::vector<float> outputData(4);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	227
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	228	// Aligned output
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	229	float* alignedOutputData = outputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	230
				231	InputTensors inputTensors
				232	{
				233	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
				234	};
				235	OutputTensors outputTensors
				236	{
				237	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
				238	};
				239
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	240	runtime->GetProfiler(netId)->EnableProfiling(true);
				241
				242	// Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	243	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	244	}
				245
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	246	inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	247	{
				248	using namespace armnn;
				249
				250	// Create runtime in which test will run
				251	IRuntime::CreationOptions options;
				252	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				253
				254	// build up the structure of the network
				255	INetworkPtr net(INetwork::Create());
				256
				257	IConnectableLayer* input = net->AddInputLayer(0);
				258
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	259	ActivationDescriptor descriptor;
				260	descriptor.m_Function = ActivationFunction::Square;
				261	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	262
				263	IConnectableLayer* output = net->AddOutputLayer(0);
				264
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	265	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				266	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	267
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	268	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	269	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	270
				271	// Optimize the network
James Conroy	a0f8b15	2022-06-21 11:31:47 +0000	[diff] [blame^]	272	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	273	CHECK(optNet);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	274
				275	// Loads it into the runtime.
				276	NetworkId netId;
				277	std::string ignoredErrorMessage;
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	278	// Enable Importing and Exporting
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	279	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	280	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
				281
				282	// Creates structures for input & output
				283	std::vector<float> inputData
				284	{
				285	1.0f, 2.0f, 3.0f, 4.0f, 5.0f
				286	};
				287
				288	// Aligned input
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	289	float* alignedInputData = inputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	290
				291	std::vector<float> outputData(5);
				292
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	293	// Misaligned output
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	294	float* misalignedOutputData = reinterpret_cast<float>(reinterpret_cast<char>(outputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	295
				296	InputTensors inputTensors
				297	{
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	298	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	299	};
				300	OutputTensors outputTensors
				301	{
				302	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
				303	};
				304
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	305	// Do the inference and expect it to fail with a ExportMemoryException
				306	if (backends[0] == Compute::CpuAcc)
				307	{
				308	// For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	309	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	310	}
				311	else
				312	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	313	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	314	}
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	315	}
				316
				317	inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
				318	{
				319	using namespace armnn;
				320
				321	// Create runtime in which test will run
				322	IRuntime::CreationOptions options;
				323	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				324
				325	// build up the structure of the network
				326	INetworkPtr net(INetwork::Create());
				327
				328	IConnectableLayer* input = net->AddInputLayer(0);
				329
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	330	ActivationDescriptor descriptor;
				331	descriptor.m_Function = ActivationFunction::Square;
				332	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	333
				334	IConnectableLayer* output = net->AddOutputLayer(0);
				335
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	336	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				337	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	338
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	339	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	340	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	341
				342	// Optimize the network
James Conroy	a0f8b15	2022-06-21 11:31:47 +0000	[diff] [blame^]	343	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	344	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	345
				346	// Loads it into the runtime.
				347	NetworkId netId;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	348	std::string ignoredErrorMessage;
				349	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	350	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	351	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	352
				353	// Creates structures for input & output
				354	std::vector<float> inputData
				355	{
				356	1.0f, 2.0f, 3.0f, 4.0f
				357	};
				358
				359	std::vector<float> outputData(4);
				360
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	361	std::vector<float> expectedOutput
				362	{
				363	1.0f, 4.0f, 9.0f, 16.0f
				364	};
				365
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	366	InputTensors inputTensors
				367	{
				368	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				369	};
				370	OutputTensors outputTensors
				371	{
				372	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				373	};
				374
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	375	runtime->GetProfiler(netId)->EnableProfiling(true);
				376
				377	// Do the inference
				378	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				379
				380	// Retrieve the Profiler.Print() output to get the workload execution
				381	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				382	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	383	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	384	std::string dump = ss.str();
				385
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	386	// Contains ActivationWorkload
				387	std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	388	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	389
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	390	// Contains SyncMemGeneric
				391	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	392	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	393
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	394	// Does not contain CopyMemGeneric
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	395	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	396	CHECK(found == std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	397
				398	// Check output is as expected
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	399	CHECK(outputData == expectedOutput);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	400	}
				401
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	402	inline void ImportOnlyWorkload(std::vector<BackendId> backends)
				403	{
				404	using namespace armnn;
				405
				406	IRuntime::CreationOptions options;
				407	IRuntimePtr runtime(IRuntime::Create(options));
				408
				409	// Builds up the structure of the network.
				410	INetworkPtr net(INetwork::Create());
				411
				412	IConnectableLayer* input = net->AddInputLayer(0);
				413
				414	ActivationDescriptor descriptor;
				415	descriptor.m_Function = ActivationFunction::Square;
				416	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				417
				418	IConnectableLayer* output = net->AddOutputLayer(0);
				419
				420	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				421	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				422
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	423	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	424	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				425
				426	// optimize the network
James Conroy	a0f8b15	2022-06-21 11:31:47 +0000	[diff] [blame^]	427	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	428
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	429	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	430	// Load it into the runtime. It should pass.
				431	NetworkId netId;
				432	std::string ignoredErrorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	433
				434	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
				435
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	436	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	437	== Status::Success);
				438
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	439	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	440	// Creates structures for input & output
				441	std::vector<float> inputData
				442	{
				443	1.0f, 2.0f, 3.0f, 4.0f
				444	};
				445
				446	std::vector<float> outputData(4);
				447
				448	std::vector<float> expectedOutput
				449	{
				450	1.0f, 4.0f, 9.0f, 16.0f
				451	};
				452
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	453	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	454
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	455	InputTensors inputTensors
				456	{
				457	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				458	};
				459	OutputTensors outputTensors
				460	{
				461	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				462	};
				463
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	464	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	465	runtime->GetProfiler(netId)->EnableProfiling(true);
				466
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	467	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	468	// Do the inference
				469	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				470
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	471	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	472	// Retrieve the Profiler.Print() output to get the workload execution
				473	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				474	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	475	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	476	std::string dump = ss.str();
				477
				478	// Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	479	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	480	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	481	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	482
				483	// Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	484	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	485	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	486	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	487
				488	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	489	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	490	}
				491
				492	inline void ExportOnlyWorkload(std::vector<BackendId> backends)
				493	{
				494	using namespace armnn;
				495
				496	IRuntime::CreationOptions options;
				497	IRuntimePtr runtime(IRuntime::Create(options));
				498
				499	// Builds up the structure of the network.
				500	INetworkPtr net(INetwork::Create());
				501
				502	IConnectableLayer* input = net->AddInputLayer(0);
				503
				504	ActivationDescriptor descriptor;
				505	descriptor.m_Function = ActivationFunction::Square;
				506	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				507
				508	IConnectableLayer* output = net->AddOutputLayer(0);
				509
				510	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				511	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				512
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	513	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	514	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				515
				516	// optimize the network
James Conroy	a0f8b15	2022-06-21 11:31:47 +0000	[diff] [blame^]	517	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	518
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	519	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	520	// Load it into the runtime. It should pass.
				521	NetworkId netId;
				522	std::string ignoredErrorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	523	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	524	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	525	== Status::Success);
				526
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	527	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	528	// Creates structures for input & output
				529	std::vector<float> inputData
				530	{
				531	1.0f, 2.0f, 3.0f, 4.0f
				532	};
				533
				534	std::vector<float> outputData(4);
				535
				536	std::vector<float> expectedOutput
				537	{
				538	1.0f, 4.0f, 9.0f, 16.0f
				539	};
				540
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	541	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	542
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	543	InputTensors inputTensors
				544	{
				545	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				546	};
				547	OutputTensors outputTensors
				548	{
				549	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				550	};
				551
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	552	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	553	runtime->GetProfiler(netId)->EnableProfiling(true);
				554
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	555	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	556	// Do the inference
				557	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				558
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	559	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	560	// Retrieve the Profiler.Print() output to get the workload execution
				561	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				562	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	563	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	564	std::string dump = ss.str();
				565
				566	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	567	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	568	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	569	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	570
				571	// Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	572	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	573	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	574	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	575
				576	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	577	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	578	}
				579
				580	inline void ImportAndExportWorkload(std::vector<BackendId> backends)
				581	{
				582	using namespace armnn;
				583
				584	IRuntime::CreationOptions options;
				585	IRuntimePtr runtime(IRuntime::Create(options));
				586
				587	// Builds up the structure of the network.
				588	INetworkPtr net(INetwork::Create());
				589
				590	IConnectableLayer* input = net->AddInputLayer(0);
				591
				592	ActivationDescriptor descriptor;
				593	descriptor.m_Function = ActivationFunction::Square;
				594	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				595
				596	IConnectableLayer* output = net->AddOutputLayer(0);
				597
				598	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				599	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				600
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	601	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	602	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				603
James Conroy	a0f8b15	2022-06-21 11:31:47 +0000	[diff] [blame^]	604	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	605
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	606	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	607	// Load it into the runtime. It should pass.
				608	NetworkId netId;
				609	std::string ignoredErrorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	610
				611	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
				612
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	613	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	614	== Status::Success);
				615
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	616	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	617	// Creates structures for input & output
				618	std::vector<float> inputData
				619	{
				620	1.0f, 2.0f, 3.0f, 4.0f
				621	};
				622
				623	std::vector<float> outputData(4);
				624
				625	std::vector<float> expectedOutput
				626	{
				627	1.0f, 4.0f, 9.0f, 16.0f
				628	};
				629
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	630	INFO("Create inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	631
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	632	InputTensors inputTensors
				633	{
				634	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				635	};
				636	OutputTensors outputTensors
				637	{
				638	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				639	};
				640
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	641	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	642	runtime->GetProfiler(netId)->EnableProfiling(true);
				643
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	644	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	645	// Do the inference
				646	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				647
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	648	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	649	// Retrieve the Profiler.Print() output to get the workload execution
				650	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				651	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	652	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	653	std::string dump = ss.str();
				654
				655	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	656	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	657	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	658	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	659
				660	// Shouldn't be any CopyMemGeneric workloads
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	661	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	662	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	663	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	664
				665	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	666	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	667	}
				668
				669	inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
				670	{
				671	using namespace armnn;
				672
				673	// Create runtime in which test will run
				674	IRuntime::CreationOptions options;
				675	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				676
				677	// build up the structure of the network
				678	INetworkPtr net(INetwork::Create());
				679
				680	IConnectableLayer* input = net->AddInputLayer(0);
				681
				682	ActivationDescriptor descriptor;
				683	descriptor.m_Function = ActivationFunction::Square;
				684	IConnectableLayer* activation = net->AddActivationLayer(descriptor);
				685
				686	IConnectableLayer* output0 = net->AddOutputLayer(0);
				687	IConnectableLayer* output1 = net->AddOutputLayer(1);
				688
				689	input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
				690	activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				691	activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
				692
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	693	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	694	activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
				695
				696	// Optimize the network
James Conroy	a0f8b15	2022-06-21 11:31:47 +0000	[diff] [blame^]	697	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	698
				699	// Loads it into the runtime.
				700	NetworkId netId;
				701	std::string ignoredErrorMessage;
				702	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	703	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	704	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
				705
				706	// Creates structures for input & output
				707	std::vector<float> inputData
				708	{
				709	1.0f, 2.0f, 3.0f, 4.0f
				710	};
				711
				712	std::vector<float> outputData0(4);
				713	std::vector<float> outputData1(4);
				714
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	715	std::vector<float> expectedOutput
				716	{
				717	1.0f, 4.0f, 9.0f, 16.0f
				718	};
				719
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	720	InputTensors inputTensors
				721	{
				722	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				723	};
				724	OutputTensors outputTensors
				725	{
				726	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
				727	{1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
				728	};
				729
				730	// The result of the inference is not important, just the fact that there
				731	// should not be CopyMemGeneric workloads.
				732	runtime->GetProfiler(netId)->EnableProfiling(true);
				733
				734	// Do the inference
				735	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				736
				737	// Retrieve the Profiler.Print() output to get the workload execution
				738	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				739	std::stringstream ss;
				740	profilerManager.GetProfiler()->Print(ss);
				741	std::string dump = ss.str();
				742
				743	std::size_t found = std::string::npos;
				744
				745	if (backends[0] == Compute::CpuRef)
				746	{
				747	found = dump.find("RefActivationWorkload");
				748	}
				749	else if (backends[0] == Compute::CpuAcc)
				750	{
				751	found = dump.find("NeonActivationWorkload");
				752	}
				753	else if (backends[0] == Compute::GpuAcc)
				754	{
				755	found = dump.find("ClActivationWorkload");
				756	}
				757
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	758	CHECK(found != std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	759	// No contains SyncMemGeneric
				760	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	761	CHECK(found == std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	762	// Contains CopyMemGeneric
				763	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	764	CHECK(found != std::string::npos);
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	765
				766	// Check that the outputs are correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	767	CHECK(std::equal(outputData0.begin(), outputData0.end(),
				768	expectedOutput.begin(), expectedOutput.end()));
				769	CHECK(std::equal(outputData1.begin(), outputData1.end(),
				770	expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	771	}
				772
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	773	inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
				774	{
				775	using namespace armnn;
				776
				777	// Create runtime in which test will run
				778	IRuntime::CreationOptions options;
				779	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				780
				781	// build up the structure of the network
				782	INetworkPtr net(INetwork::Create());
				783
				784	IConnectableLayer* input = net->AddInputLayer(0);
				785
				786	// Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
				787	// dim of the output to make it too small to hold the specified slice.
				788	StridedSliceDescriptor descriptor;
				789	descriptor.m_Begin = {0, 0};
				790	descriptor.m_End = {2, 3};
				791	descriptor.m_Stride = {1, 1};
				792	descriptor.m_BeginMask = 0;
				793	descriptor.m_EndMask = 0;
				794	descriptor.m_ShrinkAxisMask = 1;
				795	IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
				796
				797	IConnectableLayer* output0 = net->AddOutputLayer(0);
				798
				799	input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
				800	stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				801
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	802	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	803	stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
				804
				805	// Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	806	CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	807	}
				808
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	809	inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
				810	{
				811	/**
				812	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				813	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				814	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				815	* In this case all inputs and outputs should be imported
				816	*/
				817	using namespace armnn;
				818	IRuntime::CreationOptions options;
				819	IRuntimePtr runtime(IRuntime::Create(options));
				820
				821	// Builds up the structure of the network.
				822	INetworkPtr net(INetwork::Create());
				823	IConnectableLayer* input = net->AddInputLayer(0);
				824	ActivationDescriptor descriptor;
				825	descriptor.m_Function = ActivationFunction::Square;
				826	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				827	IConnectableLayer* output = net->AddOutputLayer(0);
				828	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				829	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				830	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				831	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				832	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				833	INFO("Load Network");
				834
				835	// Load it into the runtime. It should pass.
				836	NetworkId netId;
				837	std::string ignoredErrorMessage;
				838	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				839	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				840	== Status::Success);
				841	INFO("Generate Data");
				842
				843	// Creates structures for input & output
				844	std::vector<float> inputData
				845	{
				846	1.0f, 2.0f, 3.0f, 4.0f
				847	};
				848	std::vector<float> outputData(4);
				849	std::vector<float> expectedOutput
				850	{
				851	1.0f, 4.0f, 9.0f, 16.0f
				852	};
				853
				854	// Check our input and output pointers are actually aligned
				855	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				856	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				857	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				858
				859	INFO("Create Inference");
				860	InputTensors inputTensors
				861	{
				862	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				863	};
				864	OutputTensors outputTensors
				865	{
				866	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				867	};
				868
				869	runtime->GetProfiler(netId)->EnableProfiling(true);
				870	std::vector<ImportedInputId> importedInputIds =
				871	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				872	std::vector<ImportedOutputId> importedOutputIds =
				873	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				874	// Do the inference and force the import as the memory is aligned.
				875	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				876
				877	// Retrieve the Profiler.Print() output to get the workload execution
				878	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				879	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	880	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	881	std::string dump = ss.str();
				882
				883	if (backends[0] == Compute::CpuAcc)
				884	{
				885	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				886	// reconfigure is implemented
				887	int count = SubStringCounter(dump, "SyncMemGeneric");
				888	CHECK(count == 0);
				889	// Should be 2 CopyMemGeneric workloads
				890	count = SubStringCounter(dump, "CopyMemGeneric");
				891	CHECK(count == 2);
				892	}
				893	else
				894	{
				895	// Check there is a SyncMemGeneric workload as we exported
				896	int count = SubStringCounter(dump, "SyncMemGeneric");
				897	CHECK(count == 1);
				898	// Shouldn't be any CopyMemGeneric workloads
				899	count = SubStringCounter(dump, "CopyMemGeneric");
				900	CHECK(count == 0);
				901	}
				902	// Check the output is correct
				903	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				904	}
				905
				906	inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
				907	{
				908	/**
				909	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				910	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				911	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				912	* In this case all only the output should be imported
				913	*/
				914	using namespace armnn;
				915
				916	IRuntime::CreationOptions options;
				917	IRuntimePtr runtime(IRuntime::Create(options));
				918
				919	// Builds up the structure of the network.
				920	INetworkPtr net(INetwork::Create());
				921	IConnectableLayer* input = net->AddInputLayer(0);
				922
				923	ActivationDescriptor descriptor;
				924	descriptor.m_Function = ActivationFunction::Square;
				925	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				926
				927	IConnectableLayer* output = net->AddOutputLayer(0);
				928
				929	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				930	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				931	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				932	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				933
				934	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				935	INFO("Load Network");
				936	// Load it into the runtime. It should pass.
				937	NetworkId netId;
				938	std::string ignoredErrorMessage;
				939	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				940	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				941	== Status::Success);
				942	INFO("Generate Data");
				943
				944	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				945	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				946	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				947
				948	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				949
				950	// Check if our pointer is truly misaligned
				951	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				952	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				953
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	954	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	955	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	956	1.0f, 2.0f, 3.0f, 4.0f
				957	};
				958
				959	std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	960
				961	std::vector<float> outputData(4);
				962	// Check our output buffer is aligned
				963	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				964
				965	std::vector<float> expectedOutput
				966	{
				967	1.0f, 4.0f, 9.0f, 16.0f
				968	};
				969
				970	INFO("Create Inference");
				971	InputTensors inputTensors
				972	{
				973	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
				974	};
				975	OutputTensors outputTensors
				976	{
				977	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				978	};
				979	runtime->GetProfiler(netId)->EnableProfiling(true);
				980	std::vector<ImportedInputId> importedInputIds =
				981	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				982	std::vector<ImportedOutputId> importedOutputIds =
				983	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				984
				985	// Do the inference and force the import as the memory is misaligned.
				986	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				987
				988	// Retrieve the Profiler.Print() output to get the workload execution
				989	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				990	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	991	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	992	std::string dump = ss.str();
				993
				994	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				995	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				996	// for imports/copies. Only that the output is correct.
				997	if (backends[0] != Compute::GpuAcc)
				998	{
				999	if (backends[0] == Compute::CpuAcc)
				1000	{
				1001	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1002	// reconfigure is implemented
				1003	// We should get 0 SyncMemGeneric for the Output
				1004	int count = SubStringCounter(dump, "SyncMemGeneric");
				1005	CHECK(count == 0);
				1006	// Should be 2 CopyMemGeneric as we copied the input
				1007	count = SubStringCounter(dump, "CopyMemGeneric");
				1008	CHECK(count == 2);
				1009	}
				1010	else
				1011	{
				1012	// We should get 1 SyncMemGeneric for the Output
				1013	int count = SubStringCounter(dump, "SyncMemGeneric");
				1014	CHECK(count == 1);
				1015	// Should only be 1 CopyMemGeneric as we copied the input
				1016	count = SubStringCounter(dump, "CopyMemGeneric");
				1017	CHECK(count == 1);
				1018	}
				1019	}
				1020	// Check the output is correct
				1021	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1022	std::free(memPtr);
				1023	}
				1024
				1025	inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1026	{
				1027	/**
				1028	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1029	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1030	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1031	* In this case all only the input should be imported
				1032	*/
				1033	using namespace armnn;
				1034
				1035	IRuntime::CreationOptions options;
				1036	IRuntimePtr runtime(IRuntime::Create(options));
				1037
				1038	// Builds up the structure of the network.
				1039	INetworkPtr net(INetwork::Create());
				1040	IConnectableLayer* input = net->AddInputLayer(0);
				1041
				1042	ActivationDescriptor descriptor;
				1043	descriptor.m_Function = ActivationFunction::Square;
				1044	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1045
				1046	IConnectableLayer* output = net->AddOutputLayer(0);
				1047
				1048	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1049	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1050	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1051	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1052
				1053	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1054	INFO("Load Network");
				1055	// Load it into the runtime. It should pass.
				1056	NetworkId netId;
				1057	std::string ignoredErrorMessage;
				1058	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1059	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1060	== Status::Success);
				1061	INFO("Generate Data");
				1062
				1063	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1064	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1065	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1066
				1067	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				1068
				1069	// Check if our pointer is truly misaligned
				1070	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1071	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				1072
				1073	// Creates structures for input & output
				1074	std::vector<float> inputData
				1075	{
				1076	1.0f, 2.0f, 3.0f, 4.0f
				1077	};
				1078
				1079	// Check our input buffer is aligned
				1080	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1081	std::vector<float> expectedOutput
				1082	{
				1083	1.0f, 4.0f, 9.0f, 16.0f
				1084	};
				1085
				1086	INFO("Create Inference");
				1087	InputTensors inputTensors
				1088	{
				1089	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1090	};
				1091	OutputTensors outputTensors
				1092	{
				1093	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
				1094	};
				1095	runtime->GetProfiler(netId)->EnableProfiling(true);
				1096	std::vector<ImportedInputId> importedInputIds =
				1097	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				1098	std::vector<ImportedOutputId> importedOutputIds =
				1099	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				1100
				1101	// Do the inference and force the import as the memory is misaligned.
				1102	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1103
				1104	// Retrieve the Profiler.Print() output to get the workload execution
				1105	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1106	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1107	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1108	std::string dump = ss.str();
				1109
				1110	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1111	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1112	// for imports/copies. Only that the output is correct.
				1113	if (backends[0] != Compute::GpuAcc)
				1114	{
				1115	// Even though we Imported the Input we still shouldn't have a SyncMemGeneric
				1116	int count = SubStringCounter(dump, "SyncMemGeneric");
				1117	CHECK(count == 0);
				1118	// Should only be 1 CopyMemGeneric as we copied the input
				1119	count = SubStringCounter(dump, "CopyMemGeneric");
				1120	if (backends[0] == Compute::CpuAcc)
				1121	{
				1122	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1123	// reconfigure is implemented
				1124	CHECK(count == 2);
				1125	}
				1126	else
				1127	{
				1128	CHECK(count == 1);
				1129	}
				1130	// Check the output is correct
				1131	}
				1132	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1133	std::vector<float> outputData(expectedOutput.size(), 0);
				1134	std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1135	for (auto outputValue : expectedOutput)
				1136	{
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1137	CHECK(outputValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1138	++index;
				1139	}
				1140	std::free(memPtr);
				1141	}
				1142
				1143	inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1144	{
				1145	/**
				1146	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1147	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1148	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1149	* In this case all inputs and outputs should be copied
				1150	*/
				1151	using namespace armnn;
				1152
				1153	IRuntime::CreationOptions options;
				1154	IRuntimePtr runtime(IRuntime::Create(options));
				1155
				1156	// Builds up the structure of the network.
				1157	INetworkPtr net(INetwork::Create());
				1158	IConnectableLayer* input = net->AddInputLayer(0);
				1159
				1160	ActivationDescriptor descriptor;
				1161	descriptor.m_Function = ActivationFunction::Square;
				1162	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1163
				1164	IConnectableLayer* output = net->AddOutputLayer(0);
				1165
				1166	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1167	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1168	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1169	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1170
				1171	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1172	INFO("Load Network");
				1173	// Load it into the runtime. It should pass.
				1174	NetworkId netId;
				1175	std::string ignoredErrorMessage;
				1176	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1177	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1178	== Status::Success);
				1179	INFO("Generate Data");
				1180
				1181	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1182	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1183	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1184	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1185
				1186	// Check if our pointer is truly misaligned
				1187	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1188	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1189	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1190	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1191	1.0f, 2.0f, 3.0f, 4.0f
				1192	};
				1193	std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1194
				1195	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1196	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1197
				1198	// Check if our pointer is truly misaligned
				1199	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1200
				1201	std::vector<float> expectedOutput
				1202	{
				1203	1.0f, 4.0f, 9.0f, 16.0f
				1204	};
				1205
				1206	INFO("Create Inference");
				1207	InputTensors inputTensors
				1208	{
				1209	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1210	};
				1211	OutputTensors outputTensors
				1212	{
				1213	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1214	};
				1215	runtime->GetProfiler(netId)->EnableProfiling(true);
				1216	std::vector<ImportedInputId> importedInputIds =
				1217	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				1218	std::vector<ImportedOutputId> importedOutputIds =
				1219	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				1220
				1221	// Do the inference and force the import as the memory is misaligned.
				1222	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1223
				1224	// Retrieve the Profiler.Print() output to get the workload execution
				1225	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1226	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1227	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1228	std::string dump = ss.str();
				1229
				1230	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1231	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1232	// for imports/copies. Only that the output is correct.
				1233	if (backends[0] != Compute::GpuAcc)
				1234	{
				1235	// We can only copy so there should be no SyncMemGeneric
				1236	int count = SubStringCounter(dump, "SyncMemGeneric");
				1237	CHECK(count == 0);
				1238	// Should only be CopyMemGeneric workloads as we copied all buffers
				1239	count = SubStringCounter(dump, "CopyMemGeneric");
				1240	CHECK(count == 2);
				1241	}
				1242	// Check the output is correct
				1243	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1244	std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1245	std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
				1246	for (auto expectedValue : expectedOutput)
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1247	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1248	CHECK(expectedValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1249	++index;
				1250	}
				1251	std::free(inputMemPtr);
				1252	std::free(outputMemPtr);
				1253	}
				1254
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1255	inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
				1256	{
				1257	/**
				1258	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1259	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1260	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1261	* In this we create some aligned buffers, import them into a network and validate the output and number of
				1262	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
				1263	* back to copying correctly.
				1264	*/
				1265	using namespace armnn;
				1266
				1267	IRuntime::CreationOptions options;
				1268	IRuntimePtr runtime(IRuntime::Create(options));
				1269
				1270	// Builds up the structure of the network.
				1271	INetworkPtr net(INetwork::Create());
				1272	IConnectableLayer* input = net->AddInputLayer(0);
				1273
				1274	ActivationDescriptor descriptor;
				1275	descriptor.m_Function = ActivationFunction::Square;
				1276	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1277
				1278	IConnectableLayer* output = net->AddOutputLayer(0);
				1279
				1280	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1281	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1282	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1283	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1284
				1285	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1286	INFO("Load Network");
				1287	// Load it into the runtime. It should pass.
				1288	NetworkId netId;
				1289	std::string ignoredErrorMessage;
				1290	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1291	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1292	== Status::Success);
				1293	INFO("Generate Data");
				1294
				1295	// Creates structures for input & output
				1296	std::vector<float> inputData
				1297	{
				1298	1.0f, 2.0f, 3.0f, 4.0f
				1299	};
				1300	std::vector<float> outputData(4);
				1301	std::vector<float> expectedOutput
				1302	{
				1303	1.0f, 4.0f, 9.0f, 16.0f
				1304	};
				1305
				1306	// Check our input and output pointers are actually aligned
				1307	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1308	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1309	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1310
				1311	INFO("Create Inference");
				1312	InputTensors inputTensors
				1313	{
				1314	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1315	};
				1316	OutputTensors outputTensors
				1317	{
				1318	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1319	};
				1320
				1321	runtime->GetProfiler(netId)->EnableProfiling(true);
				1322	std::vector<ImportedInputId> importedInputIds =
				1323	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				1324	std::vector<ImportedOutputId> importedOutputIds =
				1325	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				1326	// Do the inference and force the import as the memory is aligned.
				1327	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1328
				1329	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1330	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1331	std::stringstream ss;
				1332	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1333	std::string dump = ss.str();
				1334
				1335	if (backends[0] == Compute::CpuAcc)
				1336	{
				1337	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1338	// reconfigure is implemented
				1339	int count = SubStringCounter(dump, "SyncMemGeneric");
				1340	CHECK(count == 0);
				1341	// Should be 2 CopyMemGeneric workloads
				1342	count = SubStringCounter(dump, "CopyMemGeneric");
				1343	CHECK(count >= 1);
				1344	}
				1345	else
				1346	{
				1347	// Check there is at least 1 SyncMemGeneric workload as we exported
				1348	int count = SubStringCounter(dump, "SyncMemGeneric");
				1349	CHECK(count >= 1);
				1350	// Shouldn't be any CopyMemGeneric workloads
				1351	count = SubStringCounter(dump, "CopyMemGeneric");
				1352	CHECK(count == 0);
				1353	}
				1354	// Check the output is correct
				1355	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1356
				1357	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1358	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1359	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1360	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1361
				1362	// Check if our pointer is truly misaligned
				1363	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1364
				1365	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1366	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1367	2.0f, 3.0f, 4.0f, 5.0f
				1368	};
				1369
				1370	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1371
				1372	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1373	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1374
				1375	// Check if our pointer is truly misaligned
				1376	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1377
				1378	std::vector<float> expectedMisalignedOutput
				1379	{
				1380	4.0f, 9.0f, 16.0f, 25.0f
				1381	};
				1382
				1383	INFO("Create Second Inference");
				1384	InputTensors inputTensorsMisaligned
				1385	{
				1386	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1387	};
				1388	OutputTensors outputTensorsMisaligned
				1389	{
				1390	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1391	};
				1392	importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
				1393	importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
				1394
				1395	// Do the inference and force the import as the memory is misaligned.
				1396	runtime->EnqueueWorkload(netId,
				1397	inputTensorsMisaligned,
				1398	outputTensorsMisaligned,
				1399	importedInputIds,
				1400	importedOutputIds);
				1401
				1402	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1403	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1404	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1405	dump = ss.str();
				1406
				1407	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1408	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1409	// for imports/copies. Only that the output is correct.
				1410	if (backends[0] != Compute::GpuAcc)
				1411	{
				1412	// The SyncMemGeneric will still be in the profiling log from the first inference
				1413	int count = SubStringCounter(dump, "SyncMemGeneric");
				1414	CHECK(count >= 1);
				1415	// We should now see CopyMemGeneric workloads as we copied all buffers
				1416	count = SubStringCounter(dump, "CopyMemGeneric");
				1417	CHECK(count >= 1);
				1418	}
				1419	// Check the output is correct
				1420	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1421	std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1422	std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1423	for (auto outputValue : expectedMisalignedOutput)
				1424	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1425	CHECK(outputValue == alignedOutputData[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1426	++index;
				1427	}
				1428	// Clean up to avoid interfering with other tests
				1429	runtime->UnloadNetwork(netId);
				1430	std::free(inputMemPtr);
				1431	std::free(outputMemPtr);
				1432	}
				1433
				1434
				1435	inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
				1436	{
				1437	/**
				1438	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1439	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1440	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1441	* In this we create some misaligned buffers, copy them into a network and validate the output and number of
				1442	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
				1443	* to importing correctly.
				1444	*/
				1445	using namespace armnn;
				1446
				1447	IRuntime::CreationOptions options;
				1448	IRuntimePtr runtime(IRuntime::Create(options));
				1449
				1450	// Builds up the structure of the network.
				1451	INetworkPtr net(INetwork::Create());
				1452	IConnectableLayer* input = net->AddInputLayer(0);
				1453
				1454	ActivationDescriptor descriptor;
				1455	descriptor.m_Function = ActivationFunction::Square;
				1456	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1457
				1458	IConnectableLayer* output = net->AddOutputLayer(0);
				1459
				1460	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1461	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1462	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1463	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1464
				1465	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1466	INFO("Load Network");
				1467	// Load it into the runtime. It should pass.
				1468	NetworkId netId;
				1469	std::string ignoredErrorMessage;
				1470	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1471	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1472	== Status::Success);
				1473	INFO("Generate Data");
				1474
				1475	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1476	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1477	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1478	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1479
				1480	// Check if our pointer is truly misaligned
				1481	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1482	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1483	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1484	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1485	2.0f, 3.0f, 4.0f, 5.0f
				1486	};
				1487	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1488
				1489	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1490	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1491
				1492	// Check if our pointer is truly misaligned
				1493	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1494
				1495	std::vector<float> expectedMisalignedOutput
				1496	{
				1497	4.0f, 9.0f, 16.0f, 25.0f
				1498	};
				1499
				1500	INFO("Create Second Inference");
				1501	InputTensors inputTensorsMisaligned
				1502	{
				1503	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1504	};
				1505	OutputTensors outputTensorsMisaligned
				1506	{
				1507	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1508	};
				1509	runtime->GetProfiler(netId)->EnableProfiling(true);
				1510	std::vector<ImportedInputId> importedInputIds =
				1511	runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
				1512	std::vector<ImportedOutputId> importedOutputIds =
				1513	runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
				1514
				1515	// Do the inference and force the import as the memory is misaligned.
				1516	runtime->EnqueueWorkload(netId,
				1517	inputTensorsMisaligned,
				1518	outputTensorsMisaligned,
				1519	importedInputIds,
				1520	importedOutputIds);
				1521
				1522	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1523	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1524	std::stringstream ss;
				1525	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1526	std::string dump = ss.str();
				1527
				1528	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1529	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1530	// for imports/copies. Only that the output is correct.
				1531	if (backends[0] != Compute::GpuAcc)
				1532	{
				1533	// We can only copy so there should be no SyncMemGeneric
				1534	int count = SubStringCounter(dump, "SyncMemGeneric");
				1535	CHECK(count == 0);
				1536	// Should only be CopyMemGeneric workloads as we copied all buffers
				1537	count = SubStringCounter(dump, "CopyMemGeneric");
				1538	CHECK(count >= 1);
				1539	}
				1540	// Check the output is correct
				1541	unsigned int index = 0;
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1542	std::vector<float> alignedOutput(expectedMisalignedOutput.size());
				1543	std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1544	for (auto outputValue : expectedMisalignedOutput)
				1545	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1546	CHECK(outputValue == alignedOutput[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1547	++index;
				1548	}
				1549	std::free(inputMemPtr);
				1550	std::free(outputMemPtr);
				1551
				1552	// Creates structures for input & output
				1553	std::vector<float> inputData
				1554	{
				1555	1.0f, 2.0f, 3.0f, 4.0f
				1556	};
				1557	std::vector<float> outputData(4);
				1558	std::vector<float> expectedOutput
				1559	{
				1560	1.0f, 4.0f, 9.0f, 16.0f
				1561	};
				1562
				1563	// Check our input and output pointers are actually aligned
				1564	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1565	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1566
				1567	INFO("Create Inference");
				1568	InputTensors inputTensors
				1569	{
				1570	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1571	};
				1572	OutputTensors outputTensors
				1573	{
				1574	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1575	};
				1576
				1577	importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				1578	importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				1579	// Do the inference and force the import as the memory is aligned.
				1580	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1581
				1582	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1583	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1584	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1585	dump = ss.str();
				1586
				1587	if (backends[0] == Compute::CpuAcc)
				1588	{
				1589	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1590	// reconfigure is implemented
				1591	int count = SubStringCounter(dump, "SyncMemGeneric");
				1592	CHECK(count == 0);
				1593	// Should be 2 CopyMemGeneric workloads
				1594	count = SubStringCounter(dump, "CopyMemGeneric");
				1595	CHECK(count >= 1);
				1596	}
				1597	else
				1598	{
				1599	// Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
				1600	// SyncMemGeneric Workload when we previously didn't
				1601	int count = SubStringCounter(dump, "SyncMemGeneric");
				1602	CHECK(count >= 1);
				1603	// Should still be some CopyMemGeneric Workloads from the last inference
				1604	count = SubStringCounter(dump, "CopyMemGeneric");
				1605	CHECK(count >= 1);
				1606	}
				1607	// Check the output is correct
				1608	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1609	// Clean up to avoid interfering with other tests
				1610	runtime->UnloadNetwork(netId);
				1611	}
				1612
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	1613	} // anonymous namespace