Blame - src/backends/backendsCommon/test/EndToEndTestImpl.hpp - ml/armnn

blob: cc5aa23ca35897fffc499c89f6ed5835cbb2ac49 [file] [log] [blame]

Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5	#pragma once
				6
Sadik Armagan	a097d2a	2021-11-24 15:47:28 +0000	[diff] [blame]	7	#include <CommonTestUtils.hpp>
Mike Kelly	386ff1a	2021-03-29 15:04:50 +0100	[diff] [blame]	8
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	9	#include <armnn/Descriptors.hpp>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	10	#include <armnn/INetwork.hpp>
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	11	#include <armnn/IRuntime.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	12
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	13	#include <Profiling.hpp>
Colm Donelan	c42a987	2022-02-02 16:35:09 +0000	[diff] [blame]	14	#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	15	#include <ResolveType.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	16
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	17	#include <doctest/doctest.h>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	18
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	19	#include <vector>
				20
				21	namespace
				22	{
				23
				24	using namespace armnn;
				25
				26	template<typename T>
				27	bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
				28	const TensorInfo& commonTensorInfo,
				29	const std::vector<T>& inputData,
				30	const std::vector<T>& constantData,
				31	const std::vector<T>& expectedOutputData)
				32	{
				33	// Create runtime in which test will run
				34	IRuntime::CreationOptions options;
				35	IRuntimePtr runtime(IRuntime::Create(options));
				36
				37	// Builds up the structure of the network.
				38	INetworkPtr net(INetwork::Create());
				39
				40	IConnectableLayer* input = net->AddInputLayer(0);
				41	IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
				42	IConnectableLayer* add = net->AddAdditionLayer();
				43	IConnectableLayer* output = net->AddOutputLayer(0);
				44
				45	input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
				46	constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
				47	add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				48
				49	// Sets the tensors in the network.
				50	input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				51	constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				52	add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				53
				54	// optimize the network
				55	IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
				56
				57	// Loads it into the runtime.
				58	NetworkId netId;
				59	runtime->LoadNetwork(netId, std::move(optNet));
				60
				61	// Creates structures for input & output.
				62	std::vector<T> outputData(inputData.size());
				63
				64	InputTensors inputTensors
				65	{
				66	{0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
				67	};
				68	OutputTensors outputTensors
				69	{
				70	{0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				71	};
				72
				73	// Does the inference.
				74	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				75
				76	// Checks the results.
				77	return outputData == expectedOutputData;
				78	}
				79
				80	inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
				81	{
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	82	TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
				83	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	84
				85	return ConstantUsageTest(backends,
				86	commonTensorInfo,
				87	std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
				88	std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
				89	std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
				90	);
				91	}
				92
				93	inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
				94	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	95	TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	96
				97	const float scale = 0.023529f;
				98	const int8_t offset = -43;
				99
				100	commonTensorInfo.SetQuantizationScale(scale);
				101	commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	102	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	103
				104	return ConstantUsageTest(backends,
				105	commonTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	106	armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
				107	armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
				108	armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	109	);
				110	}
				111
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	112	// Utility function to find the number of instances of a substring within a string.
				113	int SubStringCounter(std::string& string, std::string&& substring)
				114	{
				115	std::size_t found = 0;
				116	int count = 0;
				117	// Look for the substring starting from where we last found the substring
				118	while((found = string.find(substring, found)) != std::string::npos)
				119	{
				120	count++;
				121	// Offset by substring length to avoid finding the same substring twice
				122	found += substring.length();
				123	}
				124	return count;
				125	}
				126
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	127	template<DataType ArmnnIType, DataType ArmnnOType,
				128	typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	129	void EndToEndLayerTestImpl(INetworkPtr network,
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	130	const std::map<int, std::vector<TInput>>& inputTensorData,
				131	const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilers	bca73e1	2020-03-11 12:52:46 +0000	[diff] [blame]	132	std::vector<BackendId> backends,
				133	float tolerance = 0.000001f)
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	134	{
				135	// Create runtime in which test will run
				136	IRuntime::CreationOptions options;
				137	IRuntimePtr runtime(IRuntime::Create(options));
				138
				139	// optimize the network
				140	IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
				141
				142	// Loads it into the runtime.
				143	NetworkId netId;
				144	runtime->LoadNetwork(netId, std::move(optNet));
				145
				146	InputTensors inputTensors;
				147	inputTensors.reserve(inputTensorData.size());
				148	for (auto&& it : inputTensorData)
				149	{
				150	inputTensors.push_back({it.first,
				151	ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
				152	}
				153	OutputTensors outputTensors;
				154	outputTensors.reserve(expectedOutputData.size());
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	155	std::map<int, std::vector<TOutput>> outputStorage;
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	156	for (auto&& it : expectedOutputData)
				157	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	158	std::vector<TOutput> out(it.second.size());
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	159	outputStorage.emplace(it.first, out);
				160	outputTensors.push_back({it.first,
				161	Tensor(runtime->GetOutputTensorInfo(netId, it.first),
				162	outputStorage.at(it.first).data())});
				163	}
				164
				165	// Does the inference.
				166	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				167
				168	// Checks the results.
				169	for (auto&& it : expectedOutputData)
				170	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	171	std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tar	f97f6da	2019-10-01 18:35:44 +0100	[diff] [blame]	172	for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	173	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	174	CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Teresa Charlin	2e3f4d2	2020-07-29 14:29:20 +0100	[diff] [blame]	175	"Actual output: " << out[i] << ". Expected output:" << it.second[i]);
				176
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	177	}
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	178	}
				179	}
				180
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	181	inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	182	{
				183	using namespace armnn;
				184
				185	// Create runtime in which test will run
				186	IRuntime::CreationOptions options;
				187	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				188
				189	// build up the structure of the network
				190	INetworkPtr net(INetwork::Create());
				191
				192	IConnectableLayer* input = net->AddInputLayer(0);
				193
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	194	ActivationDescriptor descriptor;
				195	descriptor.m_Function = ActivationFunction::Square;
				196	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	197
				198	IConnectableLayer* output = net->AddOutputLayer(0);
				199
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	200	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				201	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	202
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	203	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	204	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	205
				206	// Optimize the network
Colm Donelan	03bf98a	2022-05-30 15:20:36 +0100	[diff] [blame^]	207	OptimizerOptions optimizedOptions;
				208	optimizedOptions.m_ImportEnabled = true;
				209	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	210	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	211
				212	// Loads it into the runtime.
				213	NetworkId netId;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	214	std::string ignoredErrorMessage;
				215	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	216	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	217	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	218
				219	// Creates structures for input & output
				220	std::vector<float> inputData
				221	{
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	222	1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	223	};
				224
				225	// Misaligned input
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	226	float* misalignedInputData = reinterpret_cast<float>(reinterpret_cast<char>(inputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	227
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	228	std::vector<float> outputData(4);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	229
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	230	// Aligned output
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	231	float* alignedOutputData = outputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	232
				233	InputTensors inputTensors
				234	{
				235	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
				236	};
				237	OutputTensors outputTensors
				238	{
				239	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
				240	};
				241
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	242	runtime->GetProfiler(netId)->EnableProfiling(true);
				243
				244	// Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	245	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	246	}
				247
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	248	inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	249	{
				250	using namespace armnn;
				251
				252	// Create runtime in which test will run
				253	IRuntime::CreationOptions options;
				254	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				255
				256	// build up the structure of the network
				257	INetworkPtr net(INetwork::Create());
				258
				259	IConnectableLayer* input = net->AddInputLayer(0);
				260
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	261	ActivationDescriptor descriptor;
				262	descriptor.m_Function = ActivationFunction::Square;
				263	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	264
				265	IConnectableLayer* output = net->AddOutputLayer(0);
				266
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	267	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				268	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	269
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	270	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	271	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	272
				273	// Optimize the network
Colm Donelan	03bf98a	2022-05-30 15:20:36 +0100	[diff] [blame^]	274	OptimizerOptions optimizedOptions;
				275	optimizedOptions.m_ImportEnabled = true;
				276	optimizedOptions.m_ExportEnabled = true;
				277	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	278	CHECK(optNet);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	279
				280	// Loads it into the runtime.
				281	NetworkId netId;
				282	std::string ignoredErrorMessage;
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	283	// Enable Importing and Exporting
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	284	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	285	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
				286
				287	// Creates structures for input & output
				288	std::vector<float> inputData
				289	{
				290	1.0f, 2.0f, 3.0f, 4.0f, 5.0f
				291	};
				292
				293	// Aligned input
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	294	float* alignedInputData = inputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	295
				296	std::vector<float> outputData(5);
				297
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	298	// Misaligned output
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	299	float* misalignedOutputData = reinterpret_cast<float>(reinterpret_cast<char>(outputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	300
				301	InputTensors inputTensors
				302	{
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	303	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	304	};
				305	OutputTensors outputTensors
				306	{
				307	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
				308	};
				309
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	310	// Do the inference and expect it to fail with a ExportMemoryException
				311	if (backends[0] == Compute::CpuAcc)
				312	{
				313	// For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	314	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	315	}
				316	else
				317	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	318	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	319	}
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	320	}
				321
				322	inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
				323	{
				324	using namespace armnn;
				325
				326	// Create runtime in which test will run
				327	IRuntime::CreationOptions options;
				328	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				329
				330	// build up the structure of the network
				331	INetworkPtr net(INetwork::Create());
				332
				333	IConnectableLayer* input = net->AddInputLayer(0);
				334
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	335	ActivationDescriptor descriptor;
				336	descriptor.m_Function = ActivationFunction::Square;
				337	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	338
				339	IConnectableLayer* output = net->AddOutputLayer(0);
				340
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	341	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				342	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	343
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	344	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	345	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	346
				347	// Optimize the network
Colm Donelan	03bf98a	2022-05-30 15:20:36 +0100	[diff] [blame^]	348	OptimizerOptions optimizedOptions;
				349	optimizedOptions.m_ImportEnabled = true;
				350	optimizedOptions.m_ExportEnabled = true;
				351	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	352	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	353
				354	// Loads it into the runtime.
				355	NetworkId netId;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	356	std::string ignoredErrorMessage;
				357	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	358	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	359	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	360
				361	// Creates structures for input & output
				362	std::vector<float> inputData
				363	{
				364	1.0f, 2.0f, 3.0f, 4.0f
				365	};
				366
				367	std::vector<float> outputData(4);
				368
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	369	std::vector<float> expectedOutput
				370	{
				371	1.0f, 4.0f, 9.0f, 16.0f
				372	};
				373
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	374	InputTensors inputTensors
				375	{
				376	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				377	};
				378	OutputTensors outputTensors
				379	{
				380	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				381	};
				382
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	383	runtime->GetProfiler(netId)->EnableProfiling(true);
				384
				385	// Do the inference
				386	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				387
				388	// Retrieve the Profiler.Print() output to get the workload execution
				389	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				390	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	391	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	392	std::string dump = ss.str();
				393
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	394	// Contains ActivationWorkload
				395	std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	396	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	397
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	398	// Contains SyncMemGeneric
				399	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	400	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	401
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	402	// Does not contain CopyMemGeneric
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	403	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	404	CHECK(found == std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	405
				406	// Check output is as expected
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	407	CHECK(outputData == expectedOutput);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	408	}
				409
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	410	inline void ImportOnlyWorkload(std::vector<BackendId> backends)
				411	{
				412	using namespace armnn;
				413
				414	IRuntime::CreationOptions options;
				415	IRuntimePtr runtime(IRuntime::Create(options));
				416
				417	// Builds up the structure of the network.
				418	INetworkPtr net(INetwork::Create());
				419
				420	IConnectableLayer* input = net->AddInputLayer(0);
				421
				422	ActivationDescriptor descriptor;
				423	descriptor.m_Function = ActivationFunction::Square;
				424	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				425
				426	IConnectableLayer* output = net->AddOutputLayer(0);
				427
				428	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				429	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				430
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	431	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	432	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				433
				434	// optimize the network
Colm Donelan	03bf98a	2022-05-30 15:20:36 +0100	[diff] [blame^]	435	OptimizerOptions optimizedOptions;
				436	optimizedOptions.m_ImportEnabled = true;
				437	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	438
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	439	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	440	// Load it into the runtime. It should pass.
				441	NetworkId netId;
				442	std::string ignoredErrorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	443
				444	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
				445
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	446	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	447	== Status::Success);
				448
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	449	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	450	// Creates structures for input & output
				451	std::vector<float> inputData
				452	{
				453	1.0f, 2.0f, 3.0f, 4.0f
				454	};
				455
				456	std::vector<float> outputData(4);
				457
				458	std::vector<float> expectedOutput
				459	{
				460	1.0f, 4.0f, 9.0f, 16.0f
				461	};
				462
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	463	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	464
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	465	InputTensors inputTensors
				466	{
				467	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				468	};
				469	OutputTensors outputTensors
				470	{
				471	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				472	};
				473
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	474	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	475	runtime->GetProfiler(netId)->EnableProfiling(true);
				476
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	477	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	478	// Do the inference
				479	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				480
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	481	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	482	// Retrieve the Profiler.Print() output to get the workload execution
				483	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				484	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	485	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	486	std::string dump = ss.str();
				487
				488	// Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	489	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	490	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	491	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	492
				493	// Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	494	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	495	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	496	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	497
				498	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	499	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	500	}
				501
				502	inline void ExportOnlyWorkload(std::vector<BackendId> backends)
				503	{
				504	using namespace armnn;
				505
				506	IRuntime::CreationOptions options;
				507	IRuntimePtr runtime(IRuntime::Create(options));
				508
				509	// Builds up the structure of the network.
				510	INetworkPtr net(INetwork::Create());
				511
				512	IConnectableLayer* input = net->AddInputLayer(0);
				513
				514	ActivationDescriptor descriptor;
				515	descriptor.m_Function = ActivationFunction::Square;
				516	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				517
				518	IConnectableLayer* output = net->AddOutputLayer(0);
				519
				520	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				521	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				522
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	523	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	524	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				525
				526	// optimize the network
Colm Donelan	03bf98a	2022-05-30 15:20:36 +0100	[diff] [blame^]	527	OptimizerOptions optimizedOptions;
				528	optimizedOptions.m_ExportEnabled = true;
				529	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	530
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	531	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	532	// Load it into the runtime. It should pass.
				533	NetworkId netId;
				534	std::string ignoredErrorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	535	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	536	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	537	== Status::Success);
				538
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	539	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	540	// Creates structures for input & output
				541	std::vector<float> inputData
				542	{
				543	1.0f, 2.0f, 3.0f, 4.0f
				544	};
				545
				546	std::vector<float> outputData(4);
				547
				548	std::vector<float> expectedOutput
				549	{
				550	1.0f, 4.0f, 9.0f, 16.0f
				551	};
				552
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	553	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	554
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	555	InputTensors inputTensors
				556	{
				557	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				558	};
				559	OutputTensors outputTensors
				560	{
				561	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				562	};
				563
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	564	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	565	runtime->GetProfiler(netId)->EnableProfiling(true);
				566
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	567	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	568	// Do the inference
				569	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				570
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	571	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	572	// Retrieve the Profiler.Print() output to get the workload execution
				573	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				574	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	575	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	576	std::string dump = ss.str();
				577
				578	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	579	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	580	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	581	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	582
				583	// Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	584	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	585	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	586	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	587
				588	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	589	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	590	}
				591
				592	inline void ImportAndExportWorkload(std::vector<BackendId> backends)
				593	{
				594	using namespace armnn;
				595
				596	IRuntime::CreationOptions options;
				597	IRuntimePtr runtime(IRuntime::Create(options));
				598
				599	// Builds up the structure of the network.
				600	INetworkPtr net(INetwork::Create());
				601
				602	IConnectableLayer* input = net->AddInputLayer(0);
				603
				604	ActivationDescriptor descriptor;
				605	descriptor.m_Function = ActivationFunction::Square;
				606	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				607
				608	IConnectableLayer* output = net->AddOutputLayer(0);
				609
				610	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				611	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				612
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	613	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	614	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				615
Colm Donelan	03bf98a	2022-05-30 15:20:36 +0100	[diff] [blame^]	616	OptimizerOptions optimizedOptions;
				617	optimizedOptions.m_ImportEnabled = true;
				618	optimizedOptions.m_ExportEnabled = true;
				619	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	620
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	621	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	622	// Load it into the runtime. It should pass.
				623	NetworkId netId;
				624	std::string ignoredErrorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	625
				626	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
				627
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	628	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	629	== Status::Success);
				630
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	631	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	632	// Creates structures for input & output
				633	std::vector<float> inputData
				634	{
				635	1.0f, 2.0f, 3.0f, 4.0f
				636	};
				637
				638	std::vector<float> outputData(4);
				639
				640	std::vector<float> expectedOutput
				641	{
				642	1.0f, 4.0f, 9.0f, 16.0f
				643	};
				644
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	645	INFO("Create inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	646
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	647	InputTensors inputTensors
				648	{
				649	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				650	};
				651	OutputTensors outputTensors
				652	{
				653	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				654	};
				655
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	656	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	657	runtime->GetProfiler(netId)->EnableProfiling(true);
				658
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	659	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	660	// Do the inference
				661	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				662
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	663	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	664	// Retrieve the Profiler.Print() output to get the workload execution
				665	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				666	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	667	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	668	std::string dump = ss.str();
				669
				670	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	671	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	672	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	673	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	674
				675	// Shouldn't be any CopyMemGeneric workloads
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	676	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	677	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	678	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	679
				680	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	681	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	682	}
				683
				684	inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
				685	{
				686	using namespace armnn;
				687
				688	// Create runtime in which test will run
				689	IRuntime::CreationOptions options;
				690	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				691
				692	// build up the structure of the network
				693	INetworkPtr net(INetwork::Create());
				694
				695	IConnectableLayer* input = net->AddInputLayer(0);
				696
				697	ActivationDescriptor descriptor;
				698	descriptor.m_Function = ActivationFunction::Square;
				699	IConnectableLayer* activation = net->AddActivationLayer(descriptor);
				700
				701	IConnectableLayer* output0 = net->AddOutputLayer(0);
				702	IConnectableLayer* output1 = net->AddOutputLayer(1);
				703
				704	input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
				705	activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				706	activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
				707
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	708	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	709	activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
				710
				711	// Optimize the network
Colm Donelan	03bf98a	2022-05-30 15:20:36 +0100	[diff] [blame^]	712	OptimizerOptions optimizedOptions;
				713	optimizedOptions.m_ImportEnabled = true;
				714	optimizedOptions.m_ExportEnabled = true;
				715	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	716
				717	// Loads it into the runtime.
				718	NetworkId netId;
				719	std::string ignoredErrorMessage;
				720	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	721	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	722	runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
				723
				724	// Creates structures for input & output
				725	std::vector<float> inputData
				726	{
				727	1.0f, 2.0f, 3.0f, 4.0f
				728	};
				729
				730	std::vector<float> outputData0(4);
				731	std::vector<float> outputData1(4);
				732
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	733	std::vector<float> expectedOutput
				734	{
				735	1.0f, 4.0f, 9.0f, 16.0f
				736	};
				737
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	738	InputTensors inputTensors
				739	{
				740	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				741	};
				742	OutputTensors outputTensors
				743	{
				744	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
				745	{1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
				746	};
				747
				748	// The result of the inference is not important, just the fact that there
				749	// should not be CopyMemGeneric workloads.
				750	runtime->GetProfiler(netId)->EnableProfiling(true);
				751
				752	// Do the inference
				753	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				754
				755	// Retrieve the Profiler.Print() output to get the workload execution
				756	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				757	std::stringstream ss;
				758	profilerManager.GetProfiler()->Print(ss);
				759	std::string dump = ss.str();
				760
				761	std::size_t found = std::string::npos;
				762
				763	if (backends[0] == Compute::CpuRef)
				764	{
				765	found = dump.find("RefActivationWorkload");
				766	}
				767	else if (backends[0] == Compute::CpuAcc)
				768	{
				769	found = dump.find("NeonActivationWorkload");
				770	}
				771	else if (backends[0] == Compute::GpuAcc)
				772	{
				773	found = dump.find("ClActivationWorkload");
				774	}
				775
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	776	CHECK(found != std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	777	// No contains SyncMemGeneric
				778	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	779	CHECK(found == std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	780	// Contains CopyMemGeneric
				781	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	782	CHECK(found != std::string::npos);
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	783
				784	// Check that the outputs are correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	785	CHECK(std::equal(outputData0.begin(), outputData0.end(),
				786	expectedOutput.begin(), expectedOutput.end()));
				787	CHECK(std::equal(outputData1.begin(), outputData1.end(),
				788	expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	789	}
				790
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	791	inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
				792	{
				793	using namespace armnn;
				794
				795	// Create runtime in which test will run
				796	IRuntime::CreationOptions options;
				797	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				798
				799	// build up the structure of the network
				800	INetworkPtr net(INetwork::Create());
				801
				802	IConnectableLayer* input = net->AddInputLayer(0);
				803
				804	// Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
				805	// dim of the output to make it too small to hold the specified slice.
				806	StridedSliceDescriptor descriptor;
				807	descriptor.m_Begin = {0, 0};
				808	descriptor.m_End = {2, 3};
				809	descriptor.m_Stride = {1, 1};
				810	descriptor.m_BeginMask = 0;
				811	descriptor.m_EndMask = 0;
				812	descriptor.m_ShrinkAxisMask = 1;
				813	IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
				814
				815	IConnectableLayer* output0 = net->AddOutputLayer(0);
				816
				817	input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
				818	stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				819
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	820	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	821	stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
				822
				823	// Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	824	CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	825	}
				826
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	827	inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
				828	{
				829	/**
				830	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				831	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				832	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				833	* In this case all inputs and outputs should be imported
				834	*/
				835	using namespace armnn;
				836	IRuntime::CreationOptions options;
				837	IRuntimePtr runtime(IRuntime::Create(options));
				838
				839	// Builds up the structure of the network.
				840	INetworkPtr net(INetwork::Create());
				841	IConnectableLayer* input = net->AddInputLayer(0);
				842	ActivationDescriptor descriptor;
				843	descriptor.m_Function = ActivationFunction::Square;
				844	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				845	IConnectableLayer* output = net->AddOutputLayer(0);
				846	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				847	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				848	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				849	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				850	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				851	INFO("Load Network");
				852
				853	// Load it into the runtime. It should pass.
				854	NetworkId netId;
				855	std::string ignoredErrorMessage;
				856	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				857	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				858	== Status::Success);
				859	INFO("Generate Data");
				860
				861	// Creates structures for input & output
				862	std::vector<float> inputData
				863	{
				864	1.0f, 2.0f, 3.0f, 4.0f
				865	};
				866	std::vector<float> outputData(4);
				867	std::vector<float> expectedOutput
				868	{
				869	1.0f, 4.0f, 9.0f, 16.0f
				870	};
				871
				872	// Check our input and output pointers are actually aligned
				873	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				874	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				875	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				876
				877	INFO("Create Inference");
				878	InputTensors inputTensors
				879	{
				880	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				881	};
				882	OutputTensors outputTensors
				883	{
				884	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				885	};
				886
				887	runtime->GetProfiler(netId)->EnableProfiling(true);
				888	std::vector<ImportedInputId> importedInputIds =
				889	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				890	std::vector<ImportedOutputId> importedOutputIds =
				891	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				892	// Do the inference and force the import as the memory is aligned.
				893	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				894
				895	// Retrieve the Profiler.Print() output to get the workload execution
				896	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				897	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	898	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	899	std::string dump = ss.str();
				900
				901	if (backends[0] == Compute::CpuAcc)
				902	{
				903	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				904	// reconfigure is implemented
				905	int count = SubStringCounter(dump, "SyncMemGeneric");
				906	CHECK(count == 0);
				907	// Should be 2 CopyMemGeneric workloads
				908	count = SubStringCounter(dump, "CopyMemGeneric");
				909	CHECK(count == 2);
				910	}
				911	else
				912	{
				913	// Check there is a SyncMemGeneric workload as we exported
				914	int count = SubStringCounter(dump, "SyncMemGeneric");
				915	CHECK(count == 1);
				916	// Shouldn't be any CopyMemGeneric workloads
				917	count = SubStringCounter(dump, "CopyMemGeneric");
				918	CHECK(count == 0);
				919	}
				920	// Check the output is correct
				921	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				922	}
				923
				924	inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
				925	{
				926	/**
				927	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				928	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				929	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				930	* In this case all only the output should be imported
				931	*/
				932	using namespace armnn;
				933
				934	IRuntime::CreationOptions options;
				935	IRuntimePtr runtime(IRuntime::Create(options));
				936
				937	// Builds up the structure of the network.
				938	INetworkPtr net(INetwork::Create());
				939	IConnectableLayer* input = net->AddInputLayer(0);
				940
				941	ActivationDescriptor descriptor;
				942	descriptor.m_Function = ActivationFunction::Square;
				943	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				944
				945	IConnectableLayer* output = net->AddOutputLayer(0);
				946
				947	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				948	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				949	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				950	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				951
				952	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				953	INFO("Load Network");
				954	// Load it into the runtime. It should pass.
				955	NetworkId netId;
				956	std::string ignoredErrorMessage;
				957	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				958	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				959	== Status::Success);
				960	INFO("Generate Data");
				961
				962	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				963	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				964	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				965
				966	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				967
				968	// Check if our pointer is truly misaligned
				969	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				970	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				971
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	972	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	973	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	974	1.0f, 2.0f, 3.0f, 4.0f
				975	};
				976
				977	std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	978
				979	std::vector<float> outputData(4);
				980	// Check our output buffer is aligned
				981	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				982
				983	std::vector<float> expectedOutput
				984	{
				985	1.0f, 4.0f, 9.0f, 16.0f
				986	};
				987
				988	INFO("Create Inference");
				989	InputTensors inputTensors
				990	{
				991	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
				992	};
				993	OutputTensors outputTensors
				994	{
				995	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				996	};
				997	runtime->GetProfiler(netId)->EnableProfiling(true);
				998	std::vector<ImportedInputId> importedInputIds =
				999	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				1000	std::vector<ImportedOutputId> importedOutputIds =
				1001	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				1002
				1003	// Do the inference and force the import as the memory is misaligned.
				1004	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1005
				1006	// Retrieve the Profiler.Print() output to get the workload execution
				1007	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1008	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1009	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1010	std::string dump = ss.str();
				1011
				1012	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1013	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1014	// for imports/copies. Only that the output is correct.
				1015	if (backends[0] != Compute::GpuAcc)
				1016	{
				1017	if (backends[0] == Compute::CpuAcc)
				1018	{
				1019	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1020	// reconfigure is implemented
				1021	// We should get 0 SyncMemGeneric for the Output
				1022	int count = SubStringCounter(dump, "SyncMemGeneric");
				1023	CHECK(count == 0);
				1024	// Should be 2 CopyMemGeneric as we copied the input
				1025	count = SubStringCounter(dump, "CopyMemGeneric");
				1026	CHECK(count == 2);
				1027	}
				1028	else
				1029	{
				1030	// We should get 1 SyncMemGeneric for the Output
				1031	int count = SubStringCounter(dump, "SyncMemGeneric");
				1032	CHECK(count == 1);
				1033	// Should only be 1 CopyMemGeneric as we copied the input
				1034	count = SubStringCounter(dump, "CopyMemGeneric");
				1035	CHECK(count == 1);
				1036	}
				1037	}
				1038	// Check the output is correct
				1039	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1040	std::free(memPtr);
				1041	}
				1042
				1043	inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1044	{
				1045	/**
				1046	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1047	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1048	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1049	* In this case all only the input should be imported
				1050	*/
				1051	using namespace armnn;
				1052
				1053	IRuntime::CreationOptions options;
				1054	IRuntimePtr runtime(IRuntime::Create(options));
				1055
				1056	// Builds up the structure of the network.
				1057	INetworkPtr net(INetwork::Create());
				1058	IConnectableLayer* input = net->AddInputLayer(0);
				1059
				1060	ActivationDescriptor descriptor;
				1061	descriptor.m_Function = ActivationFunction::Square;
				1062	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1063
				1064	IConnectableLayer* output = net->AddOutputLayer(0);
				1065
				1066	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1067	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1068	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1069	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1070
				1071	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1072	INFO("Load Network");
				1073	// Load it into the runtime. It should pass.
				1074	NetworkId netId;
				1075	std::string ignoredErrorMessage;
				1076	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1077	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1078	== Status::Success);
				1079	INFO("Generate Data");
				1080
				1081	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1082	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1083	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1084
				1085	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				1086
				1087	// Check if our pointer is truly misaligned
				1088	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1089	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				1090
				1091	// Creates structures for input & output
				1092	std::vector<float> inputData
				1093	{
				1094	1.0f, 2.0f, 3.0f, 4.0f
				1095	};
				1096
				1097	// Check our input buffer is aligned
				1098	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1099	std::vector<float> expectedOutput
				1100	{
				1101	1.0f, 4.0f, 9.0f, 16.0f
				1102	};
				1103
				1104	INFO("Create Inference");
				1105	InputTensors inputTensors
				1106	{
				1107	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1108	};
				1109	OutputTensors outputTensors
				1110	{
				1111	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
				1112	};
				1113	runtime->GetProfiler(netId)->EnableProfiling(true);
				1114	std::vector<ImportedInputId> importedInputIds =
				1115	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				1116	std::vector<ImportedOutputId> importedOutputIds =
				1117	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				1118
				1119	// Do the inference and force the import as the memory is misaligned.
				1120	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1121
				1122	// Retrieve the Profiler.Print() output to get the workload execution
				1123	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1124	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1125	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1126	std::string dump = ss.str();
				1127
				1128	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1129	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1130	// for imports/copies. Only that the output is correct.
				1131	if (backends[0] != Compute::GpuAcc)
				1132	{
				1133	// Even though we Imported the Input we still shouldn't have a SyncMemGeneric
				1134	int count = SubStringCounter(dump, "SyncMemGeneric");
				1135	CHECK(count == 0);
				1136	// Should only be 1 CopyMemGeneric as we copied the input
				1137	count = SubStringCounter(dump, "CopyMemGeneric");
				1138	if (backends[0] == Compute::CpuAcc)
				1139	{
				1140	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1141	// reconfigure is implemented
				1142	CHECK(count == 2);
				1143	}
				1144	else
				1145	{
				1146	CHECK(count == 1);
				1147	}
				1148	// Check the output is correct
				1149	}
				1150	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1151	std::vector<float> outputData(expectedOutput.size(), 0);
				1152	std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1153	for (auto outputValue : expectedOutput)
				1154	{
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1155	CHECK(outputValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1156	++index;
				1157	}
				1158	std::free(memPtr);
				1159	}
				1160
				1161	inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1162	{
				1163	/**
				1164	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1165	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1166	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1167	* In this case all inputs and outputs should be copied
				1168	*/
				1169	using namespace armnn;
				1170
				1171	IRuntime::CreationOptions options;
				1172	IRuntimePtr runtime(IRuntime::Create(options));
				1173
				1174	// Builds up the structure of the network.
				1175	INetworkPtr net(INetwork::Create());
				1176	IConnectableLayer* input = net->AddInputLayer(0);
				1177
				1178	ActivationDescriptor descriptor;
				1179	descriptor.m_Function = ActivationFunction::Square;
				1180	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1181
				1182	IConnectableLayer* output = net->AddOutputLayer(0);
				1183
				1184	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1185	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1186	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1187	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1188
				1189	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1190	INFO("Load Network");
				1191	// Load it into the runtime. It should pass.
				1192	NetworkId netId;
				1193	std::string ignoredErrorMessage;
				1194	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1195	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1196	== Status::Success);
				1197	INFO("Generate Data");
				1198
				1199	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1200	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1201	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1202	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1203
				1204	// Check if our pointer is truly misaligned
				1205	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1206	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1207	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1208	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1209	1.0f, 2.0f, 3.0f, 4.0f
				1210	};
				1211	std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1212
				1213	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1214	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1215
				1216	// Check if our pointer is truly misaligned
				1217	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1218
				1219	std::vector<float> expectedOutput
				1220	{
				1221	1.0f, 4.0f, 9.0f, 16.0f
				1222	};
				1223
				1224	INFO("Create Inference");
				1225	InputTensors inputTensors
				1226	{
				1227	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1228	};
				1229	OutputTensors outputTensors
				1230	{
				1231	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1232	};
				1233	runtime->GetProfiler(netId)->EnableProfiling(true);
				1234	std::vector<ImportedInputId> importedInputIds =
				1235	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				1236	std::vector<ImportedOutputId> importedOutputIds =
				1237	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				1238
				1239	// Do the inference and force the import as the memory is misaligned.
				1240	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1241
				1242	// Retrieve the Profiler.Print() output to get the workload execution
				1243	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1244	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1245	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1246	std::string dump = ss.str();
				1247
				1248	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1249	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1250	// for imports/copies. Only that the output is correct.
				1251	if (backends[0] != Compute::GpuAcc)
				1252	{
				1253	// We can only copy so there should be no SyncMemGeneric
				1254	int count = SubStringCounter(dump, "SyncMemGeneric");
				1255	CHECK(count == 0);
				1256	// Should only be CopyMemGeneric workloads as we copied all buffers
				1257	count = SubStringCounter(dump, "CopyMemGeneric");
				1258	CHECK(count == 2);
				1259	}
				1260	// Check the output is correct
				1261	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1262	std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1263	std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
				1264	for (auto expectedValue : expectedOutput)
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1265	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1266	CHECK(expectedValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1267	++index;
				1268	}
				1269	std::free(inputMemPtr);
				1270	std::free(outputMemPtr);
				1271	}
				1272
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1273	inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
				1274	{
				1275	/**
				1276	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1277	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1278	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1279	* In this we create some aligned buffers, import them into a network and validate the output and number of
				1280	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
				1281	* back to copying correctly.
				1282	*/
				1283	using namespace armnn;
				1284
				1285	IRuntime::CreationOptions options;
				1286	IRuntimePtr runtime(IRuntime::Create(options));
				1287
				1288	// Builds up the structure of the network.
				1289	INetworkPtr net(INetwork::Create());
				1290	IConnectableLayer* input = net->AddInputLayer(0);
				1291
				1292	ActivationDescriptor descriptor;
				1293	descriptor.m_Function = ActivationFunction::Square;
				1294	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1295
				1296	IConnectableLayer* output = net->AddOutputLayer(0);
				1297
				1298	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1299	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1300	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1301	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1302
				1303	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1304	INFO("Load Network");
				1305	// Load it into the runtime. It should pass.
				1306	NetworkId netId;
				1307	std::string ignoredErrorMessage;
				1308	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1309	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1310	== Status::Success);
				1311	INFO("Generate Data");
				1312
				1313	// Creates structures for input & output
				1314	std::vector<float> inputData
				1315	{
				1316	1.0f, 2.0f, 3.0f, 4.0f
				1317	};
				1318	std::vector<float> outputData(4);
				1319	std::vector<float> expectedOutput
				1320	{
				1321	1.0f, 4.0f, 9.0f, 16.0f
				1322	};
				1323
				1324	// Check our input and output pointers are actually aligned
				1325	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1326	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1327	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1328
				1329	INFO("Create Inference");
				1330	InputTensors inputTensors
				1331	{
				1332	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1333	};
				1334	OutputTensors outputTensors
				1335	{
				1336	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1337	};
				1338
				1339	runtime->GetProfiler(netId)->EnableProfiling(true);
				1340	std::vector<ImportedInputId> importedInputIds =
				1341	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				1342	std::vector<ImportedOutputId> importedOutputIds =
				1343	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				1344	// Do the inference and force the import as the memory is aligned.
				1345	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1346
				1347	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1348	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1349	std::stringstream ss;
				1350	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1351	std::string dump = ss.str();
				1352
				1353	if (backends[0] == Compute::CpuAcc)
				1354	{
				1355	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1356	// reconfigure is implemented
				1357	int count = SubStringCounter(dump, "SyncMemGeneric");
				1358	CHECK(count == 0);
				1359	// Should be 2 CopyMemGeneric workloads
				1360	count = SubStringCounter(dump, "CopyMemGeneric");
				1361	CHECK(count >= 1);
				1362	}
				1363	else
				1364	{
				1365	// Check there is at least 1 SyncMemGeneric workload as we exported
				1366	int count = SubStringCounter(dump, "SyncMemGeneric");
				1367	CHECK(count >= 1);
				1368	// Shouldn't be any CopyMemGeneric workloads
				1369	count = SubStringCounter(dump, "CopyMemGeneric");
				1370	CHECK(count == 0);
				1371	}
				1372	// Check the output is correct
				1373	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1374
				1375	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1376	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1377	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1378	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1379
				1380	// Check if our pointer is truly misaligned
				1381	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1382
				1383	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1384	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1385	2.0f, 3.0f, 4.0f, 5.0f
				1386	};
				1387
				1388	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1389
				1390	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1391	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1392
				1393	// Check if our pointer is truly misaligned
				1394	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1395
				1396	std::vector<float> expectedMisalignedOutput
				1397	{
				1398	4.0f, 9.0f, 16.0f, 25.0f
				1399	};
				1400
				1401	INFO("Create Second Inference");
				1402	InputTensors inputTensorsMisaligned
				1403	{
				1404	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1405	};
				1406	OutputTensors outputTensorsMisaligned
				1407	{
				1408	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1409	};
				1410	importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
				1411	importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
				1412
				1413	// Do the inference and force the import as the memory is misaligned.
				1414	runtime->EnqueueWorkload(netId,
				1415	inputTensorsMisaligned,
				1416	outputTensorsMisaligned,
				1417	importedInputIds,
				1418	importedOutputIds);
				1419
				1420	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1421	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1422	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1423	dump = ss.str();
				1424
				1425	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1426	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1427	// for imports/copies. Only that the output is correct.
				1428	if (backends[0] != Compute::GpuAcc)
				1429	{
				1430	// The SyncMemGeneric will still be in the profiling log from the first inference
				1431	int count = SubStringCounter(dump, "SyncMemGeneric");
				1432	CHECK(count >= 1);
				1433	// We should now see CopyMemGeneric workloads as we copied all buffers
				1434	count = SubStringCounter(dump, "CopyMemGeneric");
				1435	CHECK(count >= 1);
				1436	}
				1437	// Check the output is correct
				1438	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1439	std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1440	std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1441	for (auto outputValue : expectedMisalignedOutput)
				1442	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1443	CHECK(outputValue == alignedOutputData[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1444	++index;
				1445	}
				1446	// Clean up to avoid interfering with other tests
				1447	runtime->UnloadNetwork(netId);
				1448	std::free(inputMemPtr);
				1449	std::free(outputMemPtr);
				1450	}
				1451
				1452
				1453	inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
				1454	{
				1455	/**
				1456	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1457	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1458	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1459	* In this we create some misaligned buffers, copy them into a network and validate the output and number of
				1460	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
				1461	* to importing correctly.
				1462	*/
				1463	using namespace armnn;
				1464
				1465	IRuntime::CreationOptions options;
				1466	IRuntimePtr runtime(IRuntime::Create(options));
				1467
				1468	// Builds up the structure of the network.
				1469	INetworkPtr net(INetwork::Create());
				1470	IConnectableLayer* input = net->AddInputLayer(0);
				1471
				1472	ActivationDescriptor descriptor;
				1473	descriptor.m_Function = ActivationFunction::Square;
				1474	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1475
				1476	IConnectableLayer* output = net->AddOutputLayer(0);
				1477
				1478	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1479	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1480	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1481	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1482
				1483	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1484	INFO("Load Network");
				1485	// Load it into the runtime. It should pass.
				1486	NetworkId netId;
				1487	std::string ignoredErrorMessage;
				1488	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
				1489	CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
				1490	== Status::Success);
				1491	INFO("Generate Data");
				1492
				1493	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1494	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1495	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1496	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1497
				1498	// Check if our pointer is truly misaligned
				1499	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1500	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1501	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1502	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1503	2.0f, 3.0f, 4.0f, 5.0f
				1504	};
				1505	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1506
				1507	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1508	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1509
				1510	// Check if our pointer is truly misaligned
				1511	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1512
				1513	std::vector<float> expectedMisalignedOutput
				1514	{
				1515	4.0f, 9.0f, 16.0f, 25.0f
				1516	};
				1517
				1518	INFO("Create Second Inference");
				1519	InputTensors inputTensorsMisaligned
				1520	{
				1521	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1522	};
				1523	OutputTensors outputTensorsMisaligned
				1524	{
				1525	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1526	};
				1527	runtime->GetProfiler(netId)->EnableProfiling(true);
				1528	std::vector<ImportedInputId> importedInputIds =
				1529	runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
				1530	std::vector<ImportedOutputId> importedOutputIds =
				1531	runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
				1532
				1533	// Do the inference and force the import as the memory is misaligned.
				1534	runtime->EnqueueWorkload(netId,
				1535	inputTensorsMisaligned,
				1536	outputTensorsMisaligned,
				1537	importedInputIds,
				1538	importedOutputIds);
				1539
				1540	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1541	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1542	std::stringstream ss;
				1543	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1544	std::string dump = ss.str();
				1545
				1546	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1547	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1548	// for imports/copies. Only that the output is correct.
				1549	if (backends[0] != Compute::GpuAcc)
				1550	{
				1551	// We can only copy so there should be no SyncMemGeneric
				1552	int count = SubStringCounter(dump, "SyncMemGeneric");
				1553	CHECK(count == 0);
				1554	// Should only be CopyMemGeneric workloads as we copied all buffers
				1555	count = SubStringCounter(dump, "CopyMemGeneric");
				1556	CHECK(count >= 1);
				1557	}
				1558	// Check the output is correct
				1559	unsigned int index = 0;
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1560	std::vector<float> alignedOutput(expectedMisalignedOutput.size());
				1561	std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1562	for (auto outputValue : expectedMisalignedOutput)
				1563	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1564	CHECK(outputValue == alignedOutput[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1565	++index;
				1566	}
				1567	std::free(inputMemPtr);
				1568	std::free(outputMemPtr);
				1569
				1570	// Creates structures for input & output
				1571	std::vector<float> inputData
				1572	{
				1573	1.0f, 2.0f, 3.0f, 4.0f
				1574	};
				1575	std::vector<float> outputData(4);
				1576	std::vector<float> expectedOutput
				1577	{
				1578	1.0f, 4.0f, 9.0f, 16.0f
				1579	};
				1580
				1581	// Check our input and output pointers are actually aligned
				1582	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1583	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1584
				1585	INFO("Create Inference");
				1586	InputTensors inputTensors
				1587	{
				1588	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1589	};
				1590	OutputTensors outputTensors
				1591	{
				1592	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1593	};
				1594
				1595	importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
				1596	importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
				1597	// Do the inference and force the import as the memory is aligned.
				1598	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1599
				1600	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1601	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1602	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1603	dump = ss.str();
				1604
				1605	if (backends[0] == Compute::CpuAcc)
				1606	{
				1607	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1608	// reconfigure is implemented
				1609	int count = SubStringCounter(dump, "SyncMemGeneric");
				1610	CHECK(count == 0);
				1611	// Should be 2 CopyMemGeneric workloads
				1612	count = SubStringCounter(dump, "CopyMemGeneric");
				1613	CHECK(count >= 1);
				1614	}
				1615	else
				1616	{
				1617	// Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
				1618	// SyncMemGeneric Workload when we previously didn't
				1619	int count = SubStringCounter(dump, "SyncMemGeneric");
				1620	CHECK(count >= 1);
				1621	// Should still be some CopyMemGeneric Workloads from the last inference
				1622	count = SubStringCounter(dump, "CopyMemGeneric");
				1623	CHECK(count >= 1);
				1624	}
				1625	// Check the output is correct
				1626	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1627	// Clean up to avoid interfering with other tests
				1628	runtime->UnloadNetwork(netId);
				1629	}
				1630
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	1631	} // anonymous namespace