Blame - src/backends/backendsCommon/test/EndToEndTestImpl.hpp - ml/armnn

blob: 44ae2beb768ab84427c06ad417198a4e4988de25 [file] [log] [blame]

Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5	#pragma once
				6
Sadik Armagan	a097d2a	2021-11-24 15:47:28 +0000	[diff] [blame]	7	#include <CommonTestUtils.hpp>
Mike Kelly	386ff1a	2021-03-29 15:04:50 +0100	[diff] [blame]	8
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	9	#include <armnn/Descriptors.hpp>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	10	#include <armnn/INetwork.hpp>
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	11	#include <armnn/IRuntime.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	12
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	13	#include <Profiling.hpp>
Colm Donelan	c42a987	2022-02-02 16:35:09 +0000	[diff] [blame]	14	#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	15	#include <ResolveType.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	16
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	17	#include <doctest/doctest.h>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	18
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	19	#include <vector>
				20
				21	namespace
				22	{
				23
				24	using namespace armnn;
				25
				26	template<typename T>
				27	bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
				28	const TensorInfo& commonTensorInfo,
				29	const std::vector<T>& inputData,
				30	const std::vector<T>& constantData,
				31	const std::vector<T>& expectedOutputData)
				32	{
				33	// Create runtime in which test will run
				34	IRuntime::CreationOptions options;
				35	IRuntimePtr runtime(IRuntime::Create(options));
				36
				37	// Builds up the structure of the network.
				38	INetworkPtr net(INetwork::Create());
				39
				40	IConnectableLayer* input = net->AddInputLayer(0);
				41	IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
				42	IConnectableLayer* add = net->AddAdditionLayer();
				43	IConnectableLayer* output = net->AddOutputLayer(0);
				44
				45	input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
				46	constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
				47	add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				48
				49	// Sets the tensors in the network.
				50	input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				51	constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				52	add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				53
				54	// optimize the network
				55	IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
				56
				57	// Loads it into the runtime.
				58	NetworkId netId;
				59	runtime->LoadNetwork(netId, std::move(optNet));
				60
				61	// Creates structures for input & output.
				62	std::vector<T> outputData(inputData.size());
				63
				64	InputTensors inputTensors
				65	{
				66	{0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
				67	};
				68	OutputTensors outputTensors
				69	{
				70	{0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				71	};
				72
				73	// Does the inference.
				74	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				75
				76	// Checks the results.
				77	return outputData == expectedOutputData;
				78	}
				79
				80	inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
				81	{
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	82	TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
				83	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	84
				85	return ConstantUsageTest(backends,
				86	commonTensorInfo,
				87	std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
				88	std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
				89	std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
				90	);
				91	}
				92
				93	inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
				94	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	95	TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	96
				97	const float scale = 0.023529f;
				98	const int8_t offset = -43;
				99
				100	commonTensorInfo.SetQuantizationScale(scale);
				101	commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	102	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	103
				104	return ConstantUsageTest(backends,
				105	commonTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	106	armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
				107	armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
				108	armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	109	);
				110	}
				111
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	112	// Utility function to find the number of instances of a substring within a string.
				113	int SubStringCounter(std::string& string, std::string&& substring)
				114	{
				115	std::size_t found = 0;
				116	int count = 0;
				117	// Look for the substring starting from where we last found the substring
				118	while((found = string.find(substring, found)) != std::string::npos)
				119	{
				120	count++;
				121	// Offset by substring length to avoid finding the same substring twice
				122	found += substring.length();
				123	}
				124	return count;
				125	}
				126
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	127	template<DataType ArmnnIType, DataType ArmnnOType,
				128	typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	129	void EndToEndLayerTestImpl(INetworkPtr network,
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	130	const std::map<int, std::vector<TInput>>& inputTensorData,
				131	const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilers	bca73e1	2020-03-11 12:52:46 +0000	[diff] [blame]	132	std::vector<BackendId> backends,
				133	float tolerance = 0.000001f)
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	134	{
				135	// Create runtime in which test will run
				136	IRuntime::CreationOptions options;
				137	IRuntimePtr runtime(IRuntime::Create(options));
				138
				139	// optimize the network
				140	IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
				141
				142	// Loads it into the runtime.
				143	NetworkId netId;
				144	runtime->LoadNetwork(netId, std::move(optNet));
				145
				146	InputTensors inputTensors;
				147	inputTensors.reserve(inputTensorData.size());
				148	for (auto&& it : inputTensorData)
				149	{
				150	inputTensors.push_back({it.first,
				151	ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
				152	}
				153	OutputTensors outputTensors;
				154	outputTensors.reserve(expectedOutputData.size());
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	155	std::map<int, std::vector<TOutput>> outputStorage;
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	156	for (auto&& it : expectedOutputData)
				157	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	158	std::vector<TOutput> out(it.second.size());
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	159	outputStorage.emplace(it.first, out);
				160	outputTensors.push_back({it.first,
				161	Tensor(runtime->GetOutputTensorInfo(netId, it.first),
				162	outputStorage.at(it.first).data())});
				163	}
				164
				165	// Does the inference.
				166	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				167
				168	// Checks the results.
				169	for (auto&& it : expectedOutputData)
				170	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	171	std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tar	f97f6da	2019-10-01 18:35:44 +0100	[diff] [blame]	172	for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	173	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	174	CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Teresa Charlin	2e3f4d2	2020-07-29 14:29:20 +0100	[diff] [blame]	175	"Actual output: " << out[i] << ". Expected output:" << it.second[i]);
				176
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	177	}
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	178	}
				179	}
				180
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	181	inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	182	{
				183	using namespace armnn;
				184
				185	// Create runtime in which test will run
				186	IRuntime::CreationOptions options;
				187	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				188
				189	// build up the structure of the network
				190	INetworkPtr net(INetwork::Create());
				191
				192	IConnectableLayer* input = net->AddInputLayer(0);
				193
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	194	ActivationDescriptor descriptor;
				195	descriptor.m_Function = ActivationFunction::Square;
				196	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	197
				198	IConnectableLayer* output = net->AddOutputLayer(0);
				199
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	200	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				201	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	202
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	203	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	204	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	205
				206	// Optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	207	OptimizerOptions optimizedOptions;
				208	optimizedOptions.m_ImportEnabled = true;
				209	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	210	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	211
				212	// Loads it into the runtime.
				213	NetworkId netId;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	214	std::string ignoredErrorMessage;
				215	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	216	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	217	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	218
				219	// Creates structures for input & output
				220	std::vector<float> inputData
				221	{
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	222	1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	223	};
				224
				225	// Misaligned input
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	226	float* misalignedInputData = reinterpret_cast<float>(reinterpret_cast<char>(inputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	227
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	228	std::vector<float> outputData(4);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	229
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	230	// Aligned output
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	231	float* alignedOutputData = outputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	232
				233	InputTensors inputTensors
				234	{
				235	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
				236	};
				237	OutputTensors outputTensors
				238	{
				239	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
				240	};
				241
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	242	runtime->GetProfiler(netId)->EnableProfiling(true);
				243
				244	// Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	245	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	246	}
				247
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	248	inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	249	{
				250	using namespace armnn;
				251
				252	// Create runtime in which test will run
				253	IRuntime::CreationOptions options;
				254	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				255
				256	// build up the structure of the network
				257	INetworkPtr net(INetwork::Create());
				258
				259	IConnectableLayer* input = net->AddInputLayer(0);
				260
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	261	ActivationDescriptor descriptor;
				262	descriptor.m_Function = ActivationFunction::Square;
				263	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	264
				265	IConnectableLayer* output = net->AddOutputLayer(0);
				266
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	267	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				268	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	269
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	270	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	271	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	272
				273	// Optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	274	OptimizerOptions optimizedOptions;
				275	optimizedOptions.m_ImportEnabled = true;
				276	optimizedOptions.m_ExportEnabled = true;
				277	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	278	CHECK(optNet);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	279
				280	// Loads it into the runtime.
				281	NetworkId netId;
				282	std::string ignoredErrorMessage;
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	283	// Enable Importing and Exporting
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	284	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	285	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
				286
				287	// Creates structures for input & output
				288	std::vector<float> inputData
				289	{
				290	1.0f, 2.0f, 3.0f, 4.0f, 5.0f
				291	};
				292
				293	// Aligned input
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	294	float* alignedInputData = inputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	295
				296	std::vector<float> outputData(5);
				297
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	298	// Misaligned output
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	299	float* misalignedOutputData = reinterpret_cast<float>(reinterpret_cast<char>(outputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	300
				301	InputTensors inputTensors
				302	{
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	303	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	304	};
				305	OutputTensors outputTensors
				306	{
				307	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
				308	};
				309
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	310	// Do the inference and expect it to fail with a ExportMemoryException
				311	if (backends[0] == Compute::CpuAcc)
				312	{
				313	// For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	314	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	315	}
				316	else
				317	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	318	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	319	}
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	320	}
				321
				322	inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
				323	{
				324	using namespace armnn;
				325
				326	// Create runtime in which test will run
				327	IRuntime::CreationOptions options;
				328	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				329
				330	// build up the structure of the network
				331	INetworkPtr net(INetwork::Create());
				332
				333	IConnectableLayer* input = net->AddInputLayer(0);
				334
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	335	ActivationDescriptor descriptor;
				336	descriptor.m_Function = ActivationFunction::Square;
				337	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	338
				339	IConnectableLayer* output = net->AddOutputLayer(0);
				340
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	341	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				342	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	343
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	344	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	345	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	346
				347	// Optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	348	OptimizerOptions optimizedOptions;
				349	optimizedOptions.m_ImportEnabled = true;
				350	optimizedOptions.m_ExportEnabled = true;
				351	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	352	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	353
				354	// Loads it into the runtime.
				355	NetworkId netId;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	356	std::string ignoredErrorMessage;
				357	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	358	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	359	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	360
				361	// Creates structures for input & output
				362	std::vector<float> inputData
				363	{
				364	1.0f, 2.0f, 3.0f, 4.0f
				365	};
				366
				367	std::vector<float> outputData(4);
				368
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	369	std::vector<float> expectedOutput
				370	{
				371	1.0f, 4.0f, 9.0f, 16.0f
				372	};
				373
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	374	InputTensors inputTensors
				375	{
				376	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				377	};
				378	OutputTensors outputTensors
				379	{
				380	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				381	};
				382
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	383	runtime->GetProfiler(netId)->EnableProfiling(true);
				384
				385	// Do the inference
				386	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				387
				388	// Retrieve the Profiler.Print() output to get the workload execution
				389	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				390	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	391	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	392	std::string dump = ss.str();
				393
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	394	// Contains ActivationWorkload
				395	std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	396	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	397
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	398	// Contains SyncMemGeneric
				399	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	400	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	401
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	402	// Does not contain CopyMemGeneric
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	403	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	404	CHECK(found == std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	405
				406	// Check output is as expected
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	407	CHECK(outputData == expectedOutput);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	408	}
				409
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	410	inline void ImportOnlyWorkload(std::vector<BackendId> backends)
				411	{
				412	using namespace armnn;
				413
				414	IRuntime::CreationOptions options;
				415	IRuntimePtr runtime(IRuntime::Create(options));
				416
				417	// Builds up the structure of the network.
				418	INetworkPtr net(INetwork::Create());
				419
				420	IConnectableLayer* input = net->AddInputLayer(0);
				421
				422	ActivationDescriptor descriptor;
				423	descriptor.m_Function = ActivationFunction::Square;
				424	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				425
				426	IConnectableLayer* output = net->AddOutputLayer(0);
				427
				428	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				429	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				430
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	431	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	432	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				433
				434	// optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	435	OptimizerOptions optimizedOptions;
				436	optimizedOptions.m_ImportEnabled = true;
				437	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	438
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	439	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	440	// Load it into the runtime. It should pass.
				441	NetworkId netId;
				442	std::string ignoredErrorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	443
				444	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
				445
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	446	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	447	== Status::Success);
				448
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	449	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	450	// Creates structures for input & output
				451	std::vector<float> inputData
				452	{
				453	1.0f, 2.0f, 3.0f, 4.0f
				454	};
				455
				456	std::vector<float> outputData(4);
				457
				458	std::vector<float> expectedOutput
				459	{
				460	1.0f, 4.0f, 9.0f, 16.0f
				461	};
				462
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	463	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	464
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	465	InputTensors inputTensors
				466	{
				467	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				468	};
				469	OutputTensors outputTensors
				470	{
				471	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				472	};
				473
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	474	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	475	runtime->GetProfiler(netId)->EnableProfiling(true);
				476
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	477	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	478	// Do the inference
				479	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				480
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	481	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	482	// Retrieve the Profiler.Print() output to get the workload execution
				483	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				484	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	485	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	486	std::string dump = ss.str();
				487
				488	// Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	489	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	490	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	491	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	492
				493	// Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	494	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	495	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	496	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	497
				498	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	499	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	500	}
				501
				502	inline void ExportOnlyWorkload(std::vector<BackendId> backends)
				503	{
				504	using namespace armnn;
				505
				506	IRuntime::CreationOptions options;
				507	IRuntimePtr runtime(IRuntime::Create(options));
				508
				509	// Builds up the structure of the network.
				510	INetworkPtr net(INetwork::Create());
				511
				512	IConnectableLayer* input = net->AddInputLayer(0);
				513
				514	ActivationDescriptor descriptor;
				515	descriptor.m_Function = ActivationFunction::Square;
				516	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				517
				518	IConnectableLayer* output = net->AddOutputLayer(0);
				519
				520	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				521	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				522
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	523	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	524	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				525
				526	// optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	527	OptimizerOptions optimizedOptions;
				528	optimizedOptions.m_ExportEnabled = true;
				529	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	530
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	531	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	532	// Load it into the runtime. It should pass.
				533	NetworkId netId;
				534	std::string ignoredErrorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	535	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	536	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	537	== Status::Success);
				538
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	539	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	540	// Creates structures for input & output
				541	std::vector<float> inputData
				542	{
				543	1.0f, 2.0f, 3.0f, 4.0f
				544	};
				545
				546	std::vector<float> outputData(4);
				547
				548	std::vector<float> expectedOutput
				549	{
				550	1.0f, 4.0f, 9.0f, 16.0f
				551	};
				552
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	553	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	554
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	555	InputTensors inputTensors
				556	{
				557	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				558	};
				559	OutputTensors outputTensors
				560	{
				561	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				562	};
				563
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	564	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	565	runtime->GetProfiler(netId)->EnableProfiling(true);
				566
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	567	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	568	// Do the inference
				569	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				570
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	571	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	572	// Retrieve the Profiler.Print() output to get the workload execution
				573	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				574	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	575	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	576	std::string dump = ss.str();
				577
				578	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	579	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	580	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	581	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	582
				583	// Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	584	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	585	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	586	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	587
				588	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	589	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	590	}
				591
				592	inline void ImportAndExportWorkload(std::vector<BackendId> backends)
				593	{
				594	using namespace armnn;
				595
				596	IRuntime::CreationOptions options;
				597	IRuntimePtr runtime(IRuntime::Create(options));
				598
				599	// Builds up the structure of the network.
				600	INetworkPtr net(INetwork::Create());
				601
				602	IConnectableLayer* input = net->AddInputLayer(0);
				603
				604	ActivationDescriptor descriptor;
				605	descriptor.m_Function = ActivationFunction::Square;
				606	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				607
				608	IConnectableLayer* output = net->AddOutputLayer(0);
				609
				610	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				611	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				612
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	613	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	614	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				615
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	616	OptimizerOptions optimizedOptions;
				617	optimizedOptions.m_ImportEnabled = true;
				618	optimizedOptions.m_ExportEnabled = true;
				619	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	620
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	621	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	622	// Load it into the runtime. It should pass.
				623	NetworkId netId;
				624	std::string ignoredErrorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	625
				626	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
				627
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	628	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	629	== Status::Success);
				630
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	631	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	632	// Creates structures for input & output
				633	std::vector<float> inputData
				634	{
				635	1.0f, 2.0f, 3.0f, 4.0f
				636	};
				637
				638	std::vector<float> outputData(4);
				639
				640	std::vector<float> expectedOutput
				641	{
				642	1.0f, 4.0f, 9.0f, 16.0f
				643	};
				644
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	645	INFO("Create inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	646
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	647	InputTensors inputTensors
				648	{
				649	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				650	};
				651	OutputTensors outputTensors
				652	{
				653	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				654	};
				655
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	656	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	657	runtime->GetProfiler(netId)->EnableProfiling(true);
				658
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	659	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	660	// Do the inference
				661	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				662
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	663	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	664	// Retrieve the Profiler.Print() output to get the workload execution
				665	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				666	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	667	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	668	std::string dump = ss.str();
				669
				670	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	671	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	672	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	673	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	674
				675	// Shouldn't be any CopyMemGeneric workloads
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	676	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	677	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	678	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	679
				680	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	681	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	682	}
				683
				684	inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
				685	{
				686	using namespace armnn;
				687
				688	// Create runtime in which test will run
				689	IRuntime::CreationOptions options;
				690	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				691
				692	// build up the structure of the network
				693	INetworkPtr net(INetwork::Create());
				694
				695	IConnectableLayer* input = net->AddInputLayer(0);
				696
				697	ActivationDescriptor descriptor;
				698	descriptor.m_Function = ActivationFunction::Square;
				699	IConnectableLayer* activation = net->AddActivationLayer(descriptor);
				700
				701	IConnectableLayer* output0 = net->AddOutputLayer(0);
				702	IConnectableLayer* output1 = net->AddOutputLayer(1);
				703
				704	input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
				705	activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				706	activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
				707
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	708	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	709	activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
				710
				711	// Optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	712	OptimizerOptions optimizedOptions;
				713	optimizedOptions.m_ImportEnabled = true;
				714	optimizedOptions.m_ExportEnabled = true;
				715	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	716
				717	// Loads it into the runtime.
				718	NetworkId netId;
				719	std::string ignoredErrorMessage;
				720	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	721	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	722	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
				723
				724	// Creates structures for input & output
				725	std::vector<float> inputData
				726	{
				727	1.0f, 2.0f, 3.0f, 4.0f
				728	};
				729
				730	std::vector<float> outputData0(4);
				731	std::vector<float> outputData1(4);
				732
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	733	std::vector<float> expectedOutput
				734	{
				735	1.0f, 4.0f, 9.0f, 16.0f
				736	};
				737
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	738	InputTensors inputTensors
				739	{
				740	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				741	};
				742	OutputTensors outputTensors
				743	{
				744	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
				745	{1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
				746	};
				747
				748	// The result of the inference is not important, just the fact that there
				749	// should not be CopyMemGeneric workloads.
				750	runtime->GetProfiler(netId)->EnableProfiling(true);
				751
				752	// Do the inference
				753	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				754
				755	// Retrieve the Profiler.Print() output to get the workload execution
				756	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				757	std::stringstream ss;
				758	profilerManager.GetProfiler()->Print(ss);
				759	std::string dump = ss.str();
				760
				761	std::size_t found = std::string::npos;
				762
				763	if (backends[0] == Compute::CpuRef)
				764	{
				765	found = dump.find("RefActivationWorkload");
				766	}
				767	else if (backends[0] == Compute::CpuAcc)
				768	{
				769	found = dump.find("NeonActivationWorkload");
				770	}
				771	else if (backends[0] == Compute::GpuAcc)
				772	{
				773	found = dump.find("ClActivationWorkload");
				774	}
				775
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	776	CHECK(found != std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	777	// No contains SyncMemGeneric
				778	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	779	CHECK(found == std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	780	// Contains CopyMemGeneric
				781	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	782	CHECK(found != std::string::npos);
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	783
				784	// Check that the outputs are correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	785	CHECK(std::equal(outputData0.begin(), outputData0.end(),
				786	expectedOutput.begin(), expectedOutput.end()));
				787	CHECK(std::equal(outputData1.begin(), outputData1.end(),
				788	expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	789	}
				790
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	791	inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
				792	{
				793	using namespace armnn;
				794
				795	// Create runtime in which test will run
				796	IRuntime::CreationOptions options;
				797	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				798
				799	// build up the structure of the network
				800	INetworkPtr net(INetwork::Create());
				801
				802	IConnectableLayer* input = net->AddInputLayer(0);
				803
				804	// Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
				805	// dim of the output to make it too small to hold the specified slice.
				806	StridedSliceDescriptor descriptor;
				807	descriptor.m_Begin = {0, 0};
				808	descriptor.m_End = {2, 3};
				809	descriptor.m_Stride = {1, 1};
				810	descriptor.m_BeginMask = 0;
				811	descriptor.m_EndMask = 0;
				812	descriptor.m_ShrinkAxisMask = 1;
				813	IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
				814
				815	IConnectableLayer* output0 = net->AddOutputLayer(0);
				816
				817	input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
				818	stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				819
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	820	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	821	stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
				822
				823	// Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	824	CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	825	}
				826
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	827	inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
				828	{
				829	/**
				830	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				831	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				832	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				833	* In this case all inputs and outputs should be imported
				834	*/
				835	using namespace armnn;
				836	IRuntime::CreationOptions options;
				837	IRuntimePtr runtime(IRuntime::Create(options));
				838
				839	// Builds up the structure of the network.
				840	INetworkPtr net(INetwork::Create());
				841	IConnectableLayer* input = net->AddInputLayer(0);
				842	ActivationDescriptor descriptor;
				843	descriptor.m_Function = ActivationFunction::Square;
				844	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				845	IConnectableLayer* output = net->AddOutputLayer(0);
				846	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				847	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				848	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				849	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				850	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				851	INFO("Load Network");
				852
				853	// Load it into the runtime. It should pass.
				854	NetworkId netId;
				855	std::string ignoredErrorMessage;
				856	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				857	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				858	== Status::Success);
				859	INFO("Generate Data");
				860
				861	// Creates structures for input & output
				862	std::vector<float> inputData
				863	{
				864	1.0f, 2.0f, 3.0f, 4.0f
				865	};
				866	std::vector<float> outputData(4);
				867	std::vector<float> expectedOutput
				868	{
				869	1.0f, 4.0f, 9.0f, 16.0f
				870	};
				871
				872	// Check our input and output pointers are actually aligned
				873	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				874	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				875	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				876
				877	INFO("Create Inference");
				878	InputTensors inputTensors
				879	{
				880	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				881	};
				882	OutputTensors outputTensors
				883	{
				884	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				885	};
				886
				887	runtime->GetProfiler(netId)->EnableProfiling(true);
				888	std::vector<ImportedInputId> importedInputIds =
				889	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	890	CHECK(importedInputIds.size() == 1);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	891	std::vector<ImportedOutputId> importedOutputIds =
				892	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	893	CHECK(importedOutputIds.size() == 1);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	894	// Do the inference and force the import as the memory is aligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	895	runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	896
				897	// Retrieve the Profiler.Print() output to get the workload execution
				898	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				899	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	900	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	901	std::string dump = ss.str();
				902
				903	if (backends[0] == Compute::CpuAcc)
				904	{
				905	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				906	// reconfigure is implemented
				907	int count = SubStringCounter(dump, "SyncMemGeneric");
				908	CHECK(count == 0);
				909	// Should be 2 CopyMemGeneric workloads
				910	count = SubStringCounter(dump, "CopyMemGeneric");
				911	CHECK(count == 2);
				912	}
				913	else
				914	{
				915	// Check there is a SyncMemGeneric workload as we exported
				916	int count = SubStringCounter(dump, "SyncMemGeneric");
				917	CHECK(count == 1);
				918	// Shouldn't be any CopyMemGeneric workloads
				919	count = SubStringCounter(dump, "CopyMemGeneric");
				920	CHECK(count == 0);
				921	}
				922	// Check the output is correct
				923	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				924	}
				925
				926	inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
				927	{
				928	/**
				929	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				930	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				931	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				932	* In this case all only the output should be imported
				933	*/
				934	using namespace armnn;
				935
				936	IRuntime::CreationOptions options;
				937	IRuntimePtr runtime(IRuntime::Create(options));
				938
				939	// Builds up the structure of the network.
				940	INetworkPtr net(INetwork::Create());
				941	IConnectableLayer* input = net->AddInputLayer(0);
				942
				943	ActivationDescriptor descriptor;
				944	descriptor.m_Function = ActivationFunction::Square;
				945	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				946
				947	IConnectableLayer* output = net->AddOutputLayer(0);
				948
				949	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				950	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				951	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				952	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				953
				954	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				955	INFO("Load Network");
				956	// Load it into the runtime. It should pass.
				957	NetworkId netId;
				958	std::string ignoredErrorMessage;
				959	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				960	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				961	== Status::Success);
				962	INFO("Generate Data");
				963
				964	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				965	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				966	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				967
				968	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				969
				970	// Check if our pointer is truly misaligned
				971	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				972	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				973
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	974	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	975	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	976	1.0f, 2.0f, 3.0f, 4.0f
				977	};
				978
				979	std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	980
				981	std::vector<float> outputData(4);
				982	// Check our output buffer is aligned
				983	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				984
				985	std::vector<float> expectedOutput
				986	{
				987	1.0f, 4.0f, 9.0f, 16.0f
				988	};
				989
				990	INFO("Create Inference");
				991	InputTensors inputTensors
				992	{
				993	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
				994	};
				995	OutputTensors outputTensors
				996	{
				997	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				998	};
				999	runtime->GetProfiler(netId)->EnableProfiling(true);
				1000	std::vector<ImportedInputId> importedInputIds =
				1001	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1002	// We expect the import to have failed.
				1003	CHECK(importedInputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1004	std::vector<ImportedOutputId> importedOutputIds =
				1005	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1006	CHECK(importedOutputIds.size() == 1);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1007
				1008	// Do the inference and force the import as the memory is misaligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1009	runtime->EnqueueWorkload(netId, inputTensors, OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1010
				1011	// Retrieve the Profiler.Print() output to get the workload execution
				1012	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1013	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1014	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1015	std::string dump = ss.str();
				1016
				1017	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1018	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1019	// for imports/copies. Only that the output is correct.
				1020	if (backends[0] != Compute::GpuAcc)
				1021	{
				1022	if (backends[0] == Compute::CpuAcc)
				1023	{
				1024	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1025	// reconfigure is implemented
				1026	// We should get 0 SyncMemGeneric for the Output
				1027	int count = SubStringCounter(dump, "SyncMemGeneric");
				1028	CHECK(count == 0);
				1029	// Should be 2 CopyMemGeneric as we copied the input
				1030	count = SubStringCounter(dump, "CopyMemGeneric");
				1031	CHECK(count == 2);
				1032	}
				1033	else
				1034	{
				1035	// We should get 1 SyncMemGeneric for the Output
				1036	int count = SubStringCounter(dump, "SyncMemGeneric");
				1037	CHECK(count == 1);
				1038	// Should only be 1 CopyMemGeneric as we copied the input
				1039	count = SubStringCounter(dump, "CopyMemGeneric");
				1040	CHECK(count == 1);
				1041	}
				1042	}
				1043	// Check the output is correct
				1044	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1045	std::free(memPtr);
				1046	}
				1047
				1048	inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1049	{
				1050	/**
				1051	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1052	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1053	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1054	* In this case all only the input should be imported
				1055	*/
				1056	using namespace armnn;
				1057
				1058	IRuntime::CreationOptions options;
				1059	IRuntimePtr runtime(IRuntime::Create(options));
				1060
				1061	// Builds up the structure of the network.
				1062	INetworkPtr net(INetwork::Create());
				1063	IConnectableLayer* input = net->AddInputLayer(0);
				1064
				1065	ActivationDescriptor descriptor;
				1066	descriptor.m_Function = ActivationFunction::Square;
				1067	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1068
				1069	IConnectableLayer* output = net->AddOutputLayer(0);
				1070
				1071	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1072	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1073	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1074	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1075
				1076	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1077	INFO("Load Network");
				1078	// Load it into the runtime. It should pass.
				1079	NetworkId netId;
				1080	std::string ignoredErrorMessage;
				1081	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1082	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1083	== Status::Success);
				1084	INFO("Generate Data");
				1085
				1086	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1087	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1088	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1089
				1090	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				1091
				1092	// Check if our pointer is truly misaligned
				1093	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1094	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				1095
				1096	// Creates structures for input & output
				1097	std::vector<float> inputData
				1098	{
				1099	1.0f, 2.0f, 3.0f, 4.0f
				1100	};
				1101
				1102	// Check our input buffer is aligned
				1103	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1104	std::vector<float> expectedOutput
				1105	{
				1106	1.0f, 4.0f, 9.0f, 16.0f
				1107	};
				1108
				1109	INFO("Create Inference");
				1110	InputTensors inputTensors
				1111	{
				1112	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1113	};
				1114	OutputTensors outputTensors
				1115	{
				1116	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
				1117	};
				1118	runtime->GetProfiler(netId)->EnableProfiling(true);
				1119	std::vector<ImportedInputId> importedInputIds =
				1120	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1121	CHECK(importedInputIds.size() == 1);
				1122	// We expect this to fail.
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1123	std::vector<ImportedOutputId> importedOutputIds =
				1124	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1125	CHECK(importedOutputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1126
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1127	// Even if importing the output failed we still expect to be able to get it to work.
				1128	runtime->EnqueueWorkload(netId, InputTensors(), outputTensors, importedInputIds, importedOutputIds);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1129
				1130	// Retrieve the Profiler.Print() output to get the workload execution
				1131	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1132	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1133	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1134	std::string dump = ss.str();
				1135
				1136	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1137	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1138	// for imports/copies. Only that the output is correct.
				1139	if (backends[0] != Compute::GpuAcc)
				1140	{
				1141	// Even though we Imported the Input we still shouldn't have a SyncMemGeneric
				1142	int count = SubStringCounter(dump, "SyncMemGeneric");
				1143	CHECK(count == 0);
				1144	// Should only be 1 CopyMemGeneric as we copied the input
				1145	count = SubStringCounter(dump, "CopyMemGeneric");
				1146	if (backends[0] == Compute::CpuAcc)
				1147	{
				1148	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1149	// reconfigure is implemented
				1150	CHECK(count == 2);
				1151	}
				1152	else
				1153	{
				1154	CHECK(count == 1);
				1155	}
				1156	// Check the output is correct
				1157	}
				1158	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1159	std::vector<float> outputData(expectedOutput.size(), 0);
				1160	std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1161	for (auto outputValue : expectedOutput)
				1162	{
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1163	CHECK(outputValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1164	++index;
				1165	}
				1166	std::free(memPtr);
				1167	}
				1168
				1169	inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1170	{
				1171	/**
				1172	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1173	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1174	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1175	* In this case all inputs and outputs should be copied
				1176	*/
				1177	using namespace armnn;
				1178
				1179	IRuntime::CreationOptions options;
				1180	IRuntimePtr runtime(IRuntime::Create(options));
				1181
				1182	// Builds up the structure of the network.
				1183	INetworkPtr net(INetwork::Create());
				1184	IConnectableLayer* input = net->AddInputLayer(0);
				1185
				1186	ActivationDescriptor descriptor;
				1187	descriptor.m_Function = ActivationFunction::Square;
				1188	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1189
				1190	IConnectableLayer* output = net->AddOutputLayer(0);
				1191
				1192	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1193	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1194	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1195	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1196
				1197	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1198	INFO("Load Network");
				1199	// Load it into the runtime. It should pass.
				1200	NetworkId netId;
				1201	std::string ignoredErrorMessage;
				1202	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1203	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1204	== Status::Success);
				1205	INFO("Generate Data");
				1206
				1207	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1208	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1209	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1210	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1211
				1212	// Check if our pointer is truly misaligned
				1213	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1214	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1215	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1216	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1217	1.0f, 2.0f, 3.0f, 4.0f
				1218	};
				1219	std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1220
				1221	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1222	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1223
				1224	// Check if our pointer is truly misaligned
				1225	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1226
				1227	std::vector<float> expectedOutput
				1228	{
				1229	1.0f, 4.0f, 9.0f, 16.0f
				1230	};
				1231
				1232	INFO("Create Inference");
				1233	InputTensors inputTensors
				1234	{
				1235	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1236	};
				1237	OutputTensors outputTensors
				1238	{
				1239	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1240	};
				1241	runtime->GetProfiler(netId)->EnableProfiling(true);
				1242	std::vector<ImportedInputId> importedInputIds =
				1243	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1244	// Import should have failed.
				1245	CHECK(importedInputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1246	std::vector<ImportedOutputId> importedOutputIds =
				1247	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1248	// Import should have failed.
				1249	CHECK(importedOutputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1250
				1251	// Do the inference and force the import as the memory is misaligned.
				1252	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1253
				1254	// Retrieve the Profiler.Print() output to get the workload execution
				1255	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1256	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1257	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1258	std::string dump = ss.str();
				1259
				1260	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1261	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1262	// for imports/copies. Only that the output is correct.
				1263	if (backends[0] != Compute::GpuAcc)
				1264	{
				1265	// We can only copy so there should be no SyncMemGeneric
				1266	int count = SubStringCounter(dump, "SyncMemGeneric");
				1267	CHECK(count == 0);
				1268	// Should only be CopyMemGeneric workloads as we copied all buffers
				1269	count = SubStringCounter(dump, "CopyMemGeneric");
				1270	CHECK(count == 2);
				1271	}
				1272	// Check the output is correct
				1273	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1274	std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1275	std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
				1276	for (auto expectedValue : expectedOutput)
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1277	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1278	CHECK(expectedValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1279	++index;
				1280	}
				1281	std::free(inputMemPtr);
				1282	std::free(outputMemPtr);
				1283	}
				1284
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1285	inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
				1286	{
				1287	/**
				1288	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1289	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1290	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1291	* In this we create some aligned buffers, import them into a network and validate the output and number of
				1292	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
				1293	* back to copying correctly.
				1294	*/
				1295	using namespace armnn;
				1296
				1297	IRuntime::CreationOptions options;
				1298	IRuntimePtr runtime(IRuntime::Create(options));
				1299
				1300	// Builds up the structure of the network.
				1301	INetworkPtr net(INetwork::Create());
				1302	IConnectableLayer* input = net->AddInputLayer(0);
				1303
				1304	ActivationDescriptor descriptor;
				1305	descriptor.m_Function = ActivationFunction::Square;
				1306	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1307
				1308	IConnectableLayer* output = net->AddOutputLayer(0);
				1309
				1310	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1311	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1312	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1313	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1314
				1315	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1316	INFO("Load Network");
				1317	// Load it into the runtime. It should pass.
				1318	NetworkId netId;
				1319	std::string ignoredErrorMessage;
				1320	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1321	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1322	== Status::Success);
				1323	INFO("Generate Data");
				1324
				1325	// Creates structures for input & output
				1326	std::vector<float> inputData
				1327	{
				1328	1.0f, 2.0f, 3.0f, 4.0f
				1329	};
				1330	std::vector<float> outputData(4);
				1331	std::vector<float> expectedOutput
				1332	{
				1333	1.0f, 4.0f, 9.0f, 16.0f
				1334	};
				1335
				1336	// Check our input and output pointers are actually aligned
				1337	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1338	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1339	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1340
				1341	INFO("Create Inference");
				1342	InputTensors inputTensors
				1343	{
				1344	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1345	};
				1346	OutputTensors outputTensors
				1347	{
				1348	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1349	};
				1350
				1351	runtime->GetProfiler(netId)->EnableProfiling(true);
				1352	std::vector<ImportedInputId> importedInputIds =
				1353	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1354	CHECK(importedInputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1355	std::vector<ImportedOutputId> importedOutputIds =
				1356	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1357	CHECK(importedOutputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1358	// Do the inference and force the import as the memory is aligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1359	runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1360
				1361	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1362	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1363	std::stringstream ss;
				1364	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1365	std::string dump = ss.str();
				1366
				1367	if (backends[0] == Compute::CpuAcc)
				1368	{
				1369	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1370	// reconfigure is implemented
				1371	int count = SubStringCounter(dump, "SyncMemGeneric");
				1372	CHECK(count == 0);
				1373	// Should be 2 CopyMemGeneric workloads
				1374	count = SubStringCounter(dump, "CopyMemGeneric");
				1375	CHECK(count >= 1);
				1376	}
				1377	else
				1378	{
				1379	// Check there is at least 1 SyncMemGeneric workload as we exported
				1380	int count = SubStringCounter(dump, "SyncMemGeneric");
				1381	CHECK(count >= 1);
				1382	// Shouldn't be any CopyMemGeneric workloads
				1383	count = SubStringCounter(dump, "CopyMemGeneric");
				1384	CHECK(count == 0);
				1385	}
				1386	// Check the output is correct
				1387	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1388
				1389	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1390	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1391	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1392	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1393
				1394	// Check if our pointer is truly misaligned
				1395	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1396
				1397	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1398	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1399	2.0f, 3.0f, 4.0f, 5.0f
				1400	};
				1401
				1402	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1403
				1404	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1405	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1406
				1407	// Check if our pointer is truly misaligned
				1408	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1409
				1410	std::vector<float> expectedMisalignedOutput
				1411	{
				1412	4.0f, 9.0f, 16.0f, 25.0f
				1413	};
				1414
				1415	INFO("Create Second Inference");
				1416	InputTensors inputTensorsMisaligned
				1417	{
				1418	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1419	};
				1420	OutputTensors outputTensorsMisaligned
				1421	{
				1422	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1423	};
				1424	importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1425	// Import should fail.
				1426	CHECK(importedInputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1427	importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1428	// Import should fail.
				1429	CHECK(importedOutputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1430
				1431	// Do the inference and force the import as the memory is misaligned.
				1432	runtime->EnqueueWorkload(netId,
				1433	inputTensorsMisaligned,
				1434	outputTensorsMisaligned,
				1435	importedInputIds,
				1436	importedOutputIds);
				1437
				1438	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1439	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1440	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1441	dump = ss.str();
				1442
				1443	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1444	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1445	// for imports/copies. Only that the output is correct.
				1446	if (backends[0] != Compute::GpuAcc)
				1447	{
				1448	// The SyncMemGeneric will still be in the profiling log from the first inference
				1449	int count = SubStringCounter(dump, "SyncMemGeneric");
				1450	CHECK(count >= 1);
				1451	// We should now see CopyMemGeneric workloads as we copied all buffers
				1452	count = SubStringCounter(dump, "CopyMemGeneric");
				1453	CHECK(count >= 1);
				1454	}
				1455	// Check the output is correct
				1456	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1457	std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1458	std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1459	for (auto outputValue : expectedMisalignedOutput)
				1460	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1461	CHECK(outputValue == alignedOutputData[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1462	++index;
				1463	}
				1464	// Clean up to avoid interfering with other tests
				1465	runtime->UnloadNetwork(netId);
				1466	std::free(inputMemPtr);
				1467	std::free(outputMemPtr);
				1468	}
				1469
				1470
				1471	inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
				1472	{
				1473	/**
				1474	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1475	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1476	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1477	* In this we create some misaligned buffers, copy them into a network and validate the output and number of
				1478	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
				1479	* to importing correctly.
				1480	*/
				1481	using namespace armnn;
				1482
				1483	IRuntime::CreationOptions options;
				1484	IRuntimePtr runtime(IRuntime::Create(options));
				1485
				1486	// Builds up the structure of the network.
				1487	INetworkPtr net(INetwork::Create());
				1488	IConnectableLayer* input = net->AddInputLayer(0);
				1489
				1490	ActivationDescriptor descriptor;
				1491	descriptor.m_Function = ActivationFunction::Square;
				1492	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1493
				1494	IConnectableLayer* output = net->AddOutputLayer(0);
				1495
				1496	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1497	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1498	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1499	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1500
				1501	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1502	INFO("Load Network");
				1503	// Load it into the runtime. It should pass.
				1504	NetworkId netId;
				1505	std::string ignoredErrorMessage;
				1506	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1507	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1508	== Status::Success);
				1509	INFO("Generate Data");
				1510
				1511	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1512	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1513	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1514	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1515
				1516	// Check if our pointer is truly misaligned
				1517	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1518	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1519	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1520	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1521	2.0f, 3.0f, 4.0f, 5.0f
				1522	};
				1523	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1524
				1525	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1526	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1527
				1528	// Check if our pointer is truly misaligned
				1529	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1530
				1531	std::vector<float> expectedMisalignedOutput
				1532	{
				1533	4.0f, 9.0f, 16.0f, 25.0f
				1534	};
				1535
				1536	INFO("Create Second Inference");
				1537	InputTensors inputTensorsMisaligned
				1538	{
				1539	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1540	};
				1541	OutputTensors outputTensorsMisaligned
				1542	{
				1543	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1544	};
				1545	runtime->GetProfiler(netId)->EnableProfiling(true);
				1546	std::vector<ImportedInputId> importedInputIds =
				1547	runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1548	// Import should fail.
				1549	CHECK(importedInputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1550	std::vector<ImportedOutputId> importedOutputIds =
				1551	runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1552	// Import should fail.
				1553	CHECK(importedOutputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1554
				1555	// Do the inference and force the import as the memory is misaligned.
				1556	runtime->EnqueueWorkload(netId,
				1557	inputTensorsMisaligned,
				1558	outputTensorsMisaligned,
				1559	importedInputIds,
				1560	importedOutputIds);
				1561
				1562	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1563	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1564	std::stringstream ss;
				1565	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1566	std::string dump = ss.str();
				1567
				1568	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1569	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1570	// for imports/copies. Only that the output is correct.
				1571	if (backends[0] != Compute::GpuAcc)
				1572	{
				1573	// We can only copy so there should be no SyncMemGeneric
				1574	int count = SubStringCounter(dump, "SyncMemGeneric");
				1575	CHECK(count == 0);
				1576	// Should only be CopyMemGeneric workloads as we copied all buffers
				1577	count = SubStringCounter(dump, "CopyMemGeneric");
				1578	CHECK(count >= 1);
				1579	}
				1580	// Check the output is correct
				1581	unsigned int index = 0;
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1582	std::vector<float> alignedOutput(expectedMisalignedOutput.size());
				1583	std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1584	for (auto outputValue : expectedMisalignedOutput)
				1585	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1586	CHECK(outputValue == alignedOutput[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1587	++index;
				1588	}
				1589	std::free(inputMemPtr);
				1590	std::free(outputMemPtr);
				1591
				1592	// Creates structures for input & output
				1593	std::vector<float> inputData
				1594	{
				1595	1.0f, 2.0f, 3.0f, 4.0f
				1596	};
				1597	std::vector<float> outputData(4);
				1598	std::vector<float> expectedOutput
				1599	{
				1600	1.0f, 4.0f, 9.0f, 16.0f
				1601	};
				1602
				1603	// Check our input and output pointers are actually aligned
				1604	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1605	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1606
				1607	INFO("Create Inference");
				1608	InputTensors inputTensors
				1609	{
				1610	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1611	};
				1612	OutputTensors outputTensors
				1613	{
				1614	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1615	};
				1616
				1617	importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1618	CHECK(importedInputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1619	importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1620	CHECK(importedOutputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1621	// Do the inference and force the import as the memory is aligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame^]	1622	runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1623
				1624	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1625	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1626	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1627	dump = ss.str();
				1628
				1629	if (backends[0] == Compute::CpuAcc)
				1630	{
				1631	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1632	// reconfigure is implemented
				1633	int count = SubStringCounter(dump, "SyncMemGeneric");
				1634	CHECK(count == 0);
				1635	// Should be 2 CopyMemGeneric workloads
				1636	count = SubStringCounter(dump, "CopyMemGeneric");
				1637	CHECK(count >= 1);
				1638	}
				1639	else
				1640	{
				1641	// Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
				1642	// SyncMemGeneric Workload when we previously didn't
				1643	int count = SubStringCounter(dump, "SyncMemGeneric");
				1644	CHECK(count >= 1);
				1645	// Should still be some CopyMemGeneric Workloads from the last inference
				1646	count = SubStringCounter(dump, "CopyMemGeneric");
				1647	CHECK(count >= 1);
				1648	}
				1649	// Check the output is correct
				1650	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1651	// Clean up to avoid interfering with other tests
				1652	runtime->UnloadNetwork(netId);
				1653	}
				1654
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	1655	} // anonymous namespace