Blame - src/backends/backendsCommon/test/EndToEndTestImpl.hpp - ml/armnn

blob: bd5466ac04b732889ef89070ea5db1e9c052a656 [file] [log] [blame]

Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	1	//
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	2	// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	3	// SPDX-License-Identifier: MIT
				4	//
				5	#pragma once
				6
Sadik Armagan	a097d2a	2021-11-24 15:47:28 +0000	[diff] [blame]	7	#include <CommonTestUtils.hpp>
Mike Kelly	386ff1a	2021-03-29 15:04:50 +0100	[diff] [blame]	8
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	9	#include <armnn/Descriptors.hpp>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	10	#include <armnn/INetwork.hpp>
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	11	#include <armnn/IRuntime.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	12
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	13	#include <Profiling.hpp>
Colm Donelan	c42a987	2022-02-02 16:35:09 +0000	[diff] [blame]	14	#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	15	#include <ResolveType.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	16
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	17	#include <doctest/doctest.h>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	18
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	19	#include <vector>
				20
				21	namespace
				22	{
				23
				24	using namespace armnn;
				25
				26	template<typename T>
				27	bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
				28	const TensorInfo& commonTensorInfo,
				29	const std::vector<T>& inputData,
				30	const std::vector<T>& constantData,
				31	const std::vector<T>& expectedOutputData)
				32	{
				33	// Create runtime in which test will run
				34	IRuntime::CreationOptions options;
				35	IRuntimePtr runtime(IRuntime::Create(options));
				36
				37	// Builds up the structure of the network.
				38	INetworkPtr net(INetwork::Create());
				39
				40	IConnectableLayer* input = net->AddInputLayer(0);
				41	IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
Mike Kelly	2c14db6	2023-03-15 15:06:23 +0000	[diff] [blame]	42	ARMNN_NO_DEPRECATE_WARN_BEGIN
Mike Kelly	1a05aad	2023-03-31 18:00:00 +0100	[diff] [blame]	43	IConnectableLayer* add = net->AddAdditionLayer();
Mike Kelly	2c14db6	2023-03-15 15:06:23 +0000	[diff] [blame]	44	ARMNN_NO_DEPRECATE_WARN_END
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	45	IConnectableLayer* output = net->AddOutputLayer(0);
				46
				47	input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
				48	constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
				49	add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				50
				51	// Sets the tensors in the network.
				52	input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				53	constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				54	add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				55
				56	// optimize the network
				57	IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
				58
				59	// Loads it into the runtime.
				60	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	61	std::string errorMessage;
				62	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage);
				63	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	64
				65	// Creates structures for input & output.
				66	std::vector<T> outputData(inputData.size());
				67
				68	InputTensors inputTensors
				69	{
				70	{0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
				71	};
				72	OutputTensors outputTensors
				73	{
				74	{0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				75	};
				76
				77	// Does the inference.
				78	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				79
				80	// Checks the results.
				81	return outputData == expectedOutputData;
				82	}
				83
				84	inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
				85	{
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	86	TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
				87	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	88
				89	return ConstantUsageTest(backends,
				90	commonTensorInfo,
				91	std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
				92	std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
				93	std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
				94	);
				95	}
				96
				97	inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
				98	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	99	TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	100
				101	const float scale = 0.023529f;
				102	const int8_t offset = -43;
				103
				104	commonTensorInfo.SetQuantizationScale(scale);
				105	commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	106	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	107
				108	return ConstantUsageTest(backends,
				109	commonTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	110	armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
				111	armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
				112	armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	113	);
				114	}
				115
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	116	// Utility function to find the number of instances of a substring within a string.
				117	int SubStringCounter(std::string& string, std::string&& substring)
				118	{
				119	std::size_t found = 0;
				120	int count = 0;
				121	// Look for the substring starting from where we last found the substring
				122	while((found = string.find(substring, found)) != std::string::npos)
				123	{
				124	count++;
				125	// Offset by substring length to avoid finding the same substring twice
				126	found += substring.length();
				127	}
				128	return count;
				129	}
				130
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	131	template<DataType ArmnnIType, DataType ArmnnOType,
				132	typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	133	void EndToEndLayerTestImpl(INetworkPtr network,
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	134	const std::map<int, std::vector<TInput>>& inputTensorData,
				135	const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilers	bca73e1	2020-03-11 12:52:46 +0000	[diff] [blame]	136	std::vector<BackendId> backends,
				137	float tolerance = 0.000001f)
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	138	{
				139	// Create runtime in which test will run
				140	IRuntime::CreationOptions options;
				141	IRuntimePtr runtime(IRuntime::Create(options));
				142
				143	// optimize the network
				144	IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
				145
				146	// Loads it into the runtime.
				147	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	148	std::string errorMessage;
				149	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage);
				150	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	151
				152	InputTensors inputTensors;
				153	inputTensors.reserve(inputTensorData.size());
				154	for (auto&& it : inputTensorData)
				155	{
				156	inputTensors.push_back({it.first,
				157	ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
				158	}
				159	OutputTensors outputTensors;
				160	outputTensors.reserve(expectedOutputData.size());
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	161	std::map<int, std::vector<TOutput>> outputStorage;
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	162	for (auto&& it : expectedOutputData)
				163	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	164	std::vector<TOutput> out(it.second.size());
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	165	outputStorage.emplace(it.first, out);
				166	outputTensors.push_back({it.first,
				167	Tensor(runtime->GetOutputTensorInfo(netId, it.first),
				168	outputStorage.at(it.first).data())});
				169	}
				170
				171	// Does the inference.
				172	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				173
				174	// Checks the results.
				175	for (auto&& it : expectedOutputData)
				176	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	177	std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tar	f97f6da	2019-10-01 18:35:44 +0100	[diff] [blame]	178	for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	179	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	180	CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Mike Kelly	1a05aad	2023-03-31 18:00:00 +0100	[diff] [blame]	181	"Actual output: " << out[i] << ". Expected output:" << it.second[i]);
Teresa Charlin	2e3f4d2	2020-07-29 14:29:20 +0100	[diff] [blame]	182
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	183	}
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	184	}
				185	}
				186
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	187	inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	188	{
				189	using namespace armnn;
				190
				191	// Create runtime in which test will run
				192	IRuntime::CreationOptions options;
				193	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				194
				195	// build up the structure of the network
				196	INetworkPtr net(INetwork::Create());
				197
				198	IConnectableLayer* input = net->AddInputLayer(0);
				199
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	200	ActivationDescriptor descriptor;
				201	descriptor.m_Function = ActivationFunction::Square;
				202	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	203
				204	IConnectableLayer* output = net->AddOutputLayer(0);
				205
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	206	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				207	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	208
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	209	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	210	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	211
				212	// Optimize the network
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	213	OptimizerOptionsOpaque optimizedOptions;
				214	optimizedOptions.SetImportEnabled(true);
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	215	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	216	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	217
				218	// Loads it into the runtime.
				219	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	220	std::string errorMessage;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	221	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	222	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	223	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				224	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	225
				226	// Creates structures for input & output
				227	std::vector<float> inputData
				228	{
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	229	1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	230	};
				231
				232	// Misaligned input
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	233	float* misalignedInputData = reinterpret_cast<float>(reinterpret_cast<char>(inputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	234
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	235	std::vector<float> outputData(4);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	236
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	237	// Aligned output
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	238	float* alignedOutputData = outputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	239
				240	InputTensors inputTensors
				241	{
				242	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
				243	};
				244	OutputTensors outputTensors
				245	{
				246	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
				247	};
				248
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	249	runtime->GetProfiler(netId)->EnableProfiling(true);
				250
				251	// Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	252	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	253	}
				254
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	255	inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	256	{
				257	using namespace armnn;
				258
				259	// Create runtime in which test will run
				260	IRuntime::CreationOptions options;
				261	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				262
				263	// build up the structure of the network
				264	INetworkPtr net(INetwork::Create());
				265
				266	IConnectableLayer* input = net->AddInputLayer(0);
				267
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	268	ActivationDescriptor descriptor;
				269	descriptor.m_Function = ActivationFunction::Square;
				270	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	271
				272	IConnectableLayer* output = net->AddOutputLayer(0);
				273
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	274	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				275	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	276
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	277	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	278	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	279
				280	// Optimize the network
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	281	OptimizerOptionsOpaque optimizedOptions;
				282	optimizedOptions.SetImportEnabled(true);
				283	optimizedOptions.SetExportEnabled(true);
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	284	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	285	CHECK(optNet);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	286
				287	// Loads it into the runtime.
				288	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	289	std::string errorMessage;
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	290	// Enable Importing and Exporting
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	291	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	292	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				293	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	294
				295	// Creates structures for input & output
				296	std::vector<float> inputData
				297	{
				298	1.0f, 2.0f, 3.0f, 4.0f, 5.0f
				299	};
				300
				301	// Aligned input
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	302	float* alignedInputData = inputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	303
				304	std::vector<float> outputData(5);
				305
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	306	// Misaligned output
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	307	float* misalignedOutputData = reinterpret_cast<float>(reinterpret_cast<char>(outputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	308
				309	InputTensors inputTensors
				310	{
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	311	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	312	};
				313	OutputTensors outputTensors
				314	{
				315	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
				316	};
				317
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	318	// Do the inference and expect it to fail with a ExportMemoryException
				319	if (backends[0] == Compute::CpuAcc)
				320	{
				321	// For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	322	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	323	}
				324	else
				325	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	326	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	327	}
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	328	}
				329
				330	inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
				331	{
				332	using namespace armnn;
				333
				334	// Create runtime in which test will run
				335	IRuntime::CreationOptions options;
				336	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				337
				338	// build up the structure of the network
				339	INetworkPtr net(INetwork::Create());
				340
				341	IConnectableLayer* input = net->AddInputLayer(0);
				342
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	343	ActivationDescriptor descriptor;
				344	descriptor.m_Function = ActivationFunction::Square;
				345	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	346
				347	IConnectableLayer* output = net->AddOutputLayer(0);
				348
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	349	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				350	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	351
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	352	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	353	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	354
				355	// Optimize the network
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	356	OptimizerOptionsOpaque optimizedOptions;
				357	optimizedOptions.SetImportEnabled(true);
				358	optimizedOptions.SetExportEnabled(true);
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	359	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	360	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	361
				362	// Loads it into the runtime.
				363	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	364	std::string errorMessage;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	365	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	366	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	367	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				368	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	369
				370	// Creates structures for input & output
				371	std::vector<float> inputData
				372	{
				373	1.0f, 2.0f, 3.0f, 4.0f
				374	};
				375
				376	std::vector<float> outputData(4);
				377
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	378	std::vector<float> expectedOutput
				379	{
				380	1.0f, 4.0f, 9.0f, 16.0f
				381	};
				382
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	383	InputTensors inputTensors
				384	{
				385	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				386	};
				387	OutputTensors outputTensors
				388	{
				389	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				390	};
				391
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	392	runtime->GetProfiler(netId)->EnableProfiling(true);
				393
				394	// Do the inference
				395	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				396
				397	// Retrieve the Profiler.Print() output to get the workload execution
				398	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				399	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	400	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	401	std::string dump = ss.str();
				402
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	403	// Contains ActivationWorkload
				404	std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	405	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	406
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	407	// Contains SyncMemGeneric
				408	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	409	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	410
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	411	// Does not contain CopyMemGeneric
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	412	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	413	CHECK(found == std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	414
				415	// Check output is as expected
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	416	CHECK(outputData == expectedOutput);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	417	}
				418
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	419	inline void ImportOnlyWorkload(std::vector<BackendId> backends)
				420	{
				421	using namespace armnn;
				422
				423	IRuntime::CreationOptions options;
				424	IRuntimePtr runtime(IRuntime::Create(options));
				425
				426	// Builds up the structure of the network.
				427	INetworkPtr net(INetwork::Create());
				428
				429	IConnectableLayer* input = net->AddInputLayer(0);
				430
				431	ActivationDescriptor descriptor;
				432	descriptor.m_Function = ActivationFunction::Square;
				433	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				434
				435	IConnectableLayer* output = net->AddOutputLayer(0);
				436
				437	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				438	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				439
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	440	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	441	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				442
				443	// optimize the network
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	444	OptimizerOptionsOpaque optimizedOptions;
				445	optimizedOptions.SetImportEnabled(true);
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	446	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	447
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	448	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	449	// Load it into the runtime. It should pass.
				450	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	451	std::string errorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	452	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	453	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				454	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	455
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	456	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	457	// Creates structures for input & output
				458	std::vector<float> inputData
				459	{
				460	1.0f, 2.0f, 3.0f, 4.0f
				461	};
				462
				463	std::vector<float> outputData(4);
				464
				465	std::vector<float> expectedOutput
				466	{
				467	1.0f, 4.0f, 9.0f, 16.0f
				468	};
				469
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	470	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	471
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	472	InputTensors inputTensors
				473	{
				474	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				475	};
				476	OutputTensors outputTensors
				477	{
				478	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				479	};
				480
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	481	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	482	runtime->GetProfiler(netId)->EnableProfiling(true);
				483
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	484	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	485	// Do the inference
				486	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				487
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	488	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	489	// Retrieve the Profiler.Print() output to get the workload execution
				490	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				491	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	492	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	493	std::string dump = ss.str();
				494
				495	// Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	496	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	497	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	498	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	499
				500	// Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	501	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	502	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	503	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	504
				505	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	506	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	507	}
				508
				509	inline void ExportOnlyWorkload(std::vector<BackendId> backends)
				510	{
				511	using namespace armnn;
				512
				513	IRuntime::CreationOptions options;
				514	IRuntimePtr runtime(IRuntime::Create(options));
				515
				516	// Builds up the structure of the network.
				517	INetworkPtr net(INetwork::Create());
				518
				519	IConnectableLayer* input = net->AddInputLayer(0);
				520
				521	ActivationDescriptor descriptor;
				522	descriptor.m_Function = ActivationFunction::Square;
				523	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				524
				525	IConnectableLayer* output = net->AddOutputLayer(0);
				526
				527	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				528	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				529
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	530	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	531	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				532
				533	// optimize the network
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	534	OptimizerOptionsOpaque optimizedOptions;
				535	optimizedOptions.SetExportEnabled(true);
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	536	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	537
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	538	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	539	// Load it into the runtime. It should pass.
				540	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	541	std::string errorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	542	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	543	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				544	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	545
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	546	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	547	// Creates structures for input & output
				548	std::vector<float> inputData
				549	{
				550	1.0f, 2.0f, 3.0f, 4.0f
				551	};
				552
				553	std::vector<float> outputData(4);
				554
				555	std::vector<float> expectedOutput
				556	{
				557	1.0f, 4.0f, 9.0f, 16.0f
				558	};
				559
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	560	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	561
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	562	InputTensors inputTensors
				563	{
				564	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				565	};
				566	OutputTensors outputTensors
				567	{
				568	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				569	};
				570
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	571	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	572	runtime->GetProfiler(netId)->EnableProfiling(true);
				573
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	574	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	575	// Do the inference
				576	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				577
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	578	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	579	// Retrieve the Profiler.Print() output to get the workload execution
				580	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				581	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	582	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	583	std::string dump = ss.str();
				584
				585	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	586	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	587	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	588	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	589
				590	// Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	591	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	592	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	593	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	594
				595	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	596	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	597	}
				598
				599	inline void ImportAndExportWorkload(std::vector<BackendId> backends)
				600	{
				601	using namespace armnn;
				602
				603	IRuntime::CreationOptions options;
				604	IRuntimePtr runtime(IRuntime::Create(options));
				605
				606	// Builds up the structure of the network.
				607	INetworkPtr net(INetwork::Create());
				608
				609	IConnectableLayer* input = net->AddInputLayer(0);
				610
				611	ActivationDescriptor descriptor;
				612	descriptor.m_Function = ActivationFunction::Square;
				613	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				614
				615	IConnectableLayer* output = net->AddOutputLayer(0);
				616
				617	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				618	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				619
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	620	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	621	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				622
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	623	OptimizerOptionsOpaque optimizedOptions;
				624	optimizedOptions.SetImportEnabled(true);
				625	optimizedOptions.SetExportEnabled(true);
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	626	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	627
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	628	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	629	// Load it into the runtime. It should pass.
				630	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	631	std::string errorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	632	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	633	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				634	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	635
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	636	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	637	// Creates structures for input & output
				638	std::vector<float> inputData
				639	{
				640	1.0f, 2.0f, 3.0f, 4.0f
				641	};
				642
				643	std::vector<float> outputData(4);
				644
				645	std::vector<float> expectedOutput
				646	{
				647	1.0f, 4.0f, 9.0f, 16.0f
				648	};
				649
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	650	INFO("Create inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	651
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	652	InputTensors inputTensors
				653	{
				654	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				655	};
				656	OutputTensors outputTensors
				657	{
				658	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				659	};
				660
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	661	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	662	runtime->GetProfiler(netId)->EnableProfiling(true);
				663
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	664	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	665	// Do the inference
				666	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				667
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	668	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	669	// Retrieve the Profiler.Print() output to get the workload execution
				670	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				671	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	672	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	673	std::string dump = ss.str();
				674
				675	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	676	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	677	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	678	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	679
				680	// Shouldn't be any CopyMemGeneric workloads
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	681	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	682	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	683	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	684
				685	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	686	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	687	}
				688
				689	inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
				690	{
				691	using namespace armnn;
				692
				693	// Create runtime in which test will run
				694	IRuntime::CreationOptions options;
				695	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				696
				697	// build up the structure of the network
				698	INetworkPtr net(INetwork::Create());
				699
				700	IConnectableLayer* input = net->AddInputLayer(0);
				701
				702	ActivationDescriptor descriptor;
				703	descriptor.m_Function = ActivationFunction::Square;
				704	IConnectableLayer* activation = net->AddActivationLayer(descriptor);
				705
				706	IConnectableLayer* output0 = net->AddOutputLayer(0);
				707	IConnectableLayer* output1 = net->AddOutputLayer(1);
				708
				709	input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
				710	activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				711	activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
				712
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	713	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	714	activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
				715
				716	// Optimize the network
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	717	OptimizerOptionsOpaque optimizedOptions;
				718	optimizedOptions.SetImportEnabled(true);
				719	optimizedOptions.SetExportEnabled(true);
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	720	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	721
				722	// Loads it into the runtime.
				723	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	724	std::string errorMessage;
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	725	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	726	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	727	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				728	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	729
				730	// Creates structures for input & output
				731	std::vector<float> inputData
				732	{
				733	1.0f, 2.0f, 3.0f, 4.0f
				734	};
				735
				736	std::vector<float> outputData0(4);
				737	std::vector<float> outputData1(4);
				738
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	739	std::vector<float> expectedOutput
				740	{
				741	1.0f, 4.0f, 9.0f, 16.0f
				742	};
				743
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	744	InputTensors inputTensors
				745	{
				746	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				747	};
				748	OutputTensors outputTensors
				749	{
				750	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
				751	{1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
				752	};
				753
				754	// The result of the inference is not important, just the fact that there
				755	// should not be CopyMemGeneric workloads.
				756	runtime->GetProfiler(netId)->EnableProfiling(true);
				757
				758	// Do the inference
				759	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				760
				761	// Retrieve the Profiler.Print() output to get the workload execution
				762	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				763	std::stringstream ss;
				764	profilerManager.GetProfiler()->Print(ss);
				765	std::string dump = ss.str();
				766
				767	std::size_t found = std::string::npos;
				768
				769	if (backends[0] == Compute::CpuRef)
				770	{
				771	found = dump.find("RefActivationWorkload");
				772	}
				773	else if (backends[0] == Compute::CpuAcc)
				774	{
				775	found = dump.find("NeonActivationWorkload");
				776	}
				777	else if (backends[0] == Compute::GpuAcc)
				778	{
				779	found = dump.find("ClActivationWorkload");
				780	}
				781
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	782	CHECK(found != std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	783	// No contains SyncMemGeneric
				784	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	785	CHECK(found == std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	786	// Contains CopyMemGeneric
				787	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	788	CHECK(found != std::string::npos);
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	789
				790	// Check that the outputs are correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	791	CHECK(std::equal(outputData0.begin(), outputData0.end(),
				792	expectedOutput.begin(), expectedOutput.end()));
				793	CHECK(std::equal(outputData1.begin(), outputData1.end(),
				794	expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	795	}
				796
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	797	inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
				798	{
				799	using namespace armnn;
				800
				801	// Create runtime in which test will run
				802	IRuntime::CreationOptions options;
				803	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				804
				805	// build up the structure of the network
				806	INetworkPtr net(INetwork::Create());
				807
				808	IConnectableLayer* input = net->AddInputLayer(0);
				809
				810	// Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
				811	// dim of the output to make it too small to hold the specified slice.
				812	StridedSliceDescriptor descriptor;
				813	descriptor.m_Begin = {0, 0};
				814	descriptor.m_End = {2, 3};
				815	descriptor.m_Stride = {1, 1};
				816	descriptor.m_BeginMask = 0;
				817	descriptor.m_EndMask = 0;
				818	descriptor.m_ShrinkAxisMask = 1;
				819	IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
				820
				821	IConnectableLayer* output0 = net->AddOutputLayer(0);
				822
				823	input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
				824	stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				825
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	826	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	827	stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
				828
				829	// Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	830	CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	831	}
				832
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	833	inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
				834	{
				835	/**
				836	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				837	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				838	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				839	* In this case all inputs and outputs should be imported
				840	*/
				841	using namespace armnn;
				842	IRuntime::CreationOptions options;
				843	IRuntimePtr runtime(IRuntime::Create(options));
				844
				845	// Builds up the structure of the network.
				846	INetworkPtr net(INetwork::Create());
				847	IConnectableLayer* input = net->AddInputLayer(0);
				848	ActivationDescriptor descriptor;
				849	descriptor.m_Function = ActivationFunction::Square;
				850	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				851	IConnectableLayer* output = net->AddOutputLayer(0);
				852	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				853	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				854	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				855	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				856	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				857	INFO("Load Network");
				858
				859	// Load it into the runtime. It should pass.
				860	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	861	std::string errorMessage;
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	862	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	863	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				864	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
				865
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	866	INFO("Generate Data");
				867
				868	// Creates structures for input & output
				869	std::vector<float> inputData
				870	{
				871	1.0f, 2.0f, 3.0f, 4.0f
				872	};
				873	std::vector<float> outputData(4);
				874	std::vector<float> expectedOutput
				875	{
				876	1.0f, 4.0f, 9.0f, 16.0f
				877	};
				878
				879	// Check our input and output pointers are actually aligned
				880	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				881	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				882	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				883
				884	INFO("Create Inference");
				885	InputTensors inputTensors
				886	{
				887	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				888	};
				889	OutputTensors outputTensors
				890	{
				891	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				892	};
				893
				894	runtime->GetProfiler(netId)->EnableProfiling(true);
				895	std::vector<ImportedInputId> importedInputIds =
				896	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	897	CHECK(importedInputIds.size() == 1);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	898	std::vector<ImportedOutputId> importedOutputIds =
				899	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	900	CHECK(importedOutputIds.size() == 1);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	901	// Do the inference and force the import as the memory is aligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	902	runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	903
				904	// Retrieve the Profiler.Print() output to get the workload execution
				905	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				906	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	907	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	908	std::string dump = ss.str();
				909
				910	if (backends[0] == Compute::CpuAcc)
				911	{
				912	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				913	// reconfigure is implemented
				914	int count = SubStringCounter(dump, "SyncMemGeneric");
				915	CHECK(count == 0);
				916	// Should be 2 CopyMemGeneric workloads
				917	count = SubStringCounter(dump, "CopyMemGeneric");
				918	CHECK(count == 2);
				919	}
				920	else
				921	{
				922	// Check there is a SyncMemGeneric workload as we exported
				923	int count = SubStringCounter(dump, "SyncMemGeneric");
				924	CHECK(count == 1);
				925	// Shouldn't be any CopyMemGeneric workloads
				926	count = SubStringCounter(dump, "CopyMemGeneric");
				927	CHECK(count == 0);
				928	}
				929	// Check the output is correct
				930	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				931	}
				932
				933	inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
				934	{
				935	/**
				936	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				937	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				938	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				939	* In this case all only the output should be imported
				940	*/
				941	using namespace armnn;
				942
				943	IRuntime::CreationOptions options;
				944	IRuntimePtr runtime(IRuntime::Create(options));
				945
				946	// Builds up the structure of the network.
				947	INetworkPtr net(INetwork::Create());
				948	IConnectableLayer* input = net->AddInputLayer(0);
				949
				950	ActivationDescriptor descriptor;
				951	descriptor.m_Function = ActivationFunction::Square;
				952	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				953
				954	IConnectableLayer* output = net->AddOutputLayer(0);
				955
				956	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				957	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				958	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				959	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				960
				961	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				962	INFO("Load Network");
				963	// Load it into the runtime. It should pass.
				964	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	965	std::string errorMessage;
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	966	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	967	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				968	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
				969
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	970	INFO("Generate Data");
				971
				972	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				973	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				974	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				975
				976	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				977
				978	// Check if our pointer is truly misaligned
				979	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				980	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				981
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	982	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	983	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	984	1.0f, 2.0f, 3.0f, 4.0f
				985	};
				986
				987	std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	988
				989	std::vector<float> outputData(4);
				990	// Check our output buffer is aligned
				991	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				992
				993	std::vector<float> expectedOutput
				994	{
				995	1.0f, 4.0f, 9.0f, 16.0f
				996	};
				997
				998	INFO("Create Inference");
				999	InputTensors inputTensors
				1000	{
				1001	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
				1002	};
				1003	OutputTensors outputTensors
				1004	{
				1005	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1006	};
				1007	runtime->GetProfiler(netId)->EnableProfiling(true);
				1008	std::vector<ImportedInputId> importedInputIds =
				1009	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1010	// We expect the import to have failed.
				1011	CHECK(importedInputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1012	std::vector<ImportedOutputId> importedOutputIds =
				1013	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1014	CHECK(importedOutputIds.size() == 1);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1015
				1016	// Do the inference and force the import as the memory is misaligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1017	runtime->EnqueueWorkload(netId, inputTensors, OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1018
				1019	// Retrieve the Profiler.Print() output to get the workload execution
				1020	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1021	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1022	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1023	std::string dump = ss.str();
				1024
				1025	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1026	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1027	// for imports/copies. Only that the output is correct.
				1028	if (backends[0] != Compute::GpuAcc)
				1029	{
				1030	if (backends[0] == Compute::CpuAcc)
				1031	{
				1032	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1033	// reconfigure is implemented
				1034	// We should get 0 SyncMemGeneric for the Output
				1035	int count = SubStringCounter(dump, "SyncMemGeneric");
				1036	CHECK(count == 0);
				1037	// Should be 2 CopyMemGeneric as we copied the input
				1038	count = SubStringCounter(dump, "CopyMemGeneric");
				1039	CHECK(count == 2);
				1040	}
				1041	else
				1042	{
				1043	// We should get 1 SyncMemGeneric for the Output
				1044	int count = SubStringCounter(dump, "SyncMemGeneric");
				1045	CHECK(count == 1);
				1046	// Should only be 1 CopyMemGeneric as we copied the input
				1047	count = SubStringCounter(dump, "CopyMemGeneric");
				1048	CHECK(count == 1);
				1049	}
				1050	}
				1051	// Check the output is correct
				1052	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1053	std::free(memPtr);
				1054	}
				1055
				1056	inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1057	{
				1058	/**
				1059	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1060	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1061	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1062	* In this case all only the input should be imported
				1063	*/
				1064	using namespace armnn;
				1065
				1066	IRuntime::CreationOptions options;
				1067	IRuntimePtr runtime(IRuntime::Create(options));
				1068
				1069	// Builds up the structure of the network.
				1070	INetworkPtr net(INetwork::Create());
				1071	IConnectableLayer* input = net->AddInputLayer(0);
				1072
				1073	ActivationDescriptor descriptor;
				1074	descriptor.m_Function = ActivationFunction::Square;
				1075	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1076
				1077	IConnectableLayer* output = net->AddOutputLayer(0);
				1078
				1079	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1080	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1081	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1082	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1083
				1084	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1085	INFO("Load Network");
				1086	// Load it into the runtime. It should pass.
				1087	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	1088	std::string errorMessage;
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1089	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	1090	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				1091	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
				1092
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1093	INFO("Generate Data");
				1094
				1095	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1096	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1097	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1098
				1099	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				1100
				1101	// Check if our pointer is truly misaligned
				1102	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1103	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				1104
				1105	// Creates structures for input & output
				1106	std::vector<float> inputData
				1107	{
				1108	1.0f, 2.0f, 3.0f, 4.0f
				1109	};
				1110
				1111	// Check our input buffer is aligned
				1112	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1113	std::vector<float> expectedOutput
				1114	{
				1115	1.0f, 4.0f, 9.0f, 16.0f
				1116	};
				1117
				1118	INFO("Create Inference");
				1119	InputTensors inputTensors
				1120	{
				1121	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1122	};
				1123	OutputTensors outputTensors
				1124	{
				1125	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
				1126	};
				1127	runtime->GetProfiler(netId)->EnableProfiling(true);
				1128	std::vector<ImportedInputId> importedInputIds =
				1129	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1130	CHECK(importedInputIds.size() == 1);
				1131	// We expect this to fail.
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1132	std::vector<ImportedOutputId> importedOutputIds =
				1133	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1134	CHECK(importedOutputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1135
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1136	// Even if importing the output failed we still expect to be able to get it to work.
				1137	runtime->EnqueueWorkload(netId, InputTensors(), outputTensors, importedInputIds, importedOutputIds);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1138
				1139	// Retrieve the Profiler.Print() output to get the workload execution
				1140	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1141	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1142	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1143	std::string dump = ss.str();
				1144
				1145	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1146	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1147	// for imports/copies. Only that the output is correct.
				1148	if (backends[0] != Compute::GpuAcc)
				1149	{
				1150	// Even though we Imported the Input we still shouldn't have a SyncMemGeneric
				1151	int count = SubStringCounter(dump, "SyncMemGeneric");
				1152	CHECK(count == 0);
				1153	// Should only be 1 CopyMemGeneric as we copied the input
				1154	count = SubStringCounter(dump, "CopyMemGeneric");
				1155	if (backends[0] == Compute::CpuAcc)
				1156	{
				1157	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1158	// reconfigure is implemented
				1159	CHECK(count == 2);
				1160	}
				1161	else
				1162	{
				1163	CHECK(count == 1);
				1164	}
				1165	// Check the output is correct
				1166	}
				1167	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1168	std::vector<float> outputData(expectedOutput.size(), 0);
				1169	std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1170	for (auto outputValue : expectedOutput)
				1171	{
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1172	CHECK(outputValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1173	++index;
				1174	}
				1175	std::free(memPtr);
				1176	}
				1177
				1178	inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1179	{
				1180	/**
				1181	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1182	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1183	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1184	* In this case all inputs and outputs should be copied
				1185	*/
				1186	using namespace armnn;
				1187
				1188	IRuntime::CreationOptions options;
				1189	IRuntimePtr runtime(IRuntime::Create(options));
				1190
				1191	// Builds up the structure of the network.
				1192	INetworkPtr net(INetwork::Create());
				1193	IConnectableLayer* input = net->AddInputLayer(0);
				1194
				1195	ActivationDescriptor descriptor;
				1196	descriptor.m_Function = ActivationFunction::Square;
				1197	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1198
				1199	IConnectableLayer* output = net->AddOutputLayer(0);
				1200
				1201	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1202	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1203	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1204	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1205
				1206	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1207	INFO("Load Network");
				1208	// Load it into the runtime. It should pass.
				1209	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	1210	std::string errorMessage;
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1211	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	1212	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				1213	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1214	INFO("Generate Data");
				1215
				1216	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1217	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1218	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1219	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1220
				1221	// Check if our pointer is truly misaligned
				1222	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1223	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1224	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1225	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1226	1.0f, 2.0f, 3.0f, 4.0f
				1227	};
				1228	std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1229
				1230	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1231	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1232
				1233	// Check if our pointer is truly misaligned
				1234	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1235
				1236	std::vector<float> expectedOutput
				1237	{
				1238	1.0f, 4.0f, 9.0f, 16.0f
				1239	};
				1240
				1241	INFO("Create Inference");
				1242	InputTensors inputTensors
				1243	{
				1244	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1245	};
				1246	OutputTensors outputTensors
				1247	{
				1248	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1249	};
				1250	runtime->GetProfiler(netId)->EnableProfiling(true);
				1251	std::vector<ImportedInputId> importedInputIds =
				1252	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1253	// Import should have failed.
				1254	CHECK(importedInputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1255	std::vector<ImportedOutputId> importedOutputIds =
				1256	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1257	// Import should have failed.
				1258	CHECK(importedOutputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1259
				1260	// Do the inference and force the import as the memory is misaligned.
				1261	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1262
				1263	// Retrieve the Profiler.Print() output to get the workload execution
				1264	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1265	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1266	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1267	std::string dump = ss.str();
				1268
				1269	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1270	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1271	// for imports/copies. Only that the output is correct.
				1272	if (backends[0] != Compute::GpuAcc)
				1273	{
				1274	// We can only copy so there should be no SyncMemGeneric
				1275	int count = SubStringCounter(dump, "SyncMemGeneric");
				1276	CHECK(count == 0);
				1277	// Should only be CopyMemGeneric workloads as we copied all buffers
				1278	count = SubStringCounter(dump, "CopyMemGeneric");
				1279	CHECK(count == 2);
				1280	}
				1281	// Check the output is correct
				1282	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1283	std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1284	std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
				1285	for (auto expectedValue : expectedOutput)
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1286	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1287	CHECK(expectedValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1288	++index;
				1289	}
				1290	std::free(inputMemPtr);
				1291	std::free(outputMemPtr);
				1292	}
				1293
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1294	inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
				1295	{
				1296	/**
				1297	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1298	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1299	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1300	* In this we create some aligned buffers, import them into a network and validate the output and number of
				1301	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
				1302	* back to copying correctly.
				1303	*/
				1304	using namespace armnn;
				1305
				1306	IRuntime::CreationOptions options;
				1307	IRuntimePtr runtime(IRuntime::Create(options));
				1308
				1309	// Builds up the structure of the network.
				1310	INetworkPtr net(INetwork::Create());
				1311	IConnectableLayer* input = net->AddInputLayer(0);
				1312
				1313	ActivationDescriptor descriptor;
				1314	descriptor.m_Function = ActivationFunction::Square;
				1315	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1316
				1317	IConnectableLayer* output = net->AddOutputLayer(0);
				1318
				1319	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1320	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1321	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1322	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1323
				1324	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1325	INFO("Load Network");
				1326	// Load it into the runtime. It should pass.
				1327	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	1328	std::string errorMessage;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1329	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	1330	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				1331	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1332	INFO("Generate Data");
				1333
				1334	// Creates structures for input & output
				1335	std::vector<float> inputData
				1336	{
				1337	1.0f, 2.0f, 3.0f, 4.0f
				1338	};
				1339	std::vector<float> outputData(4);
				1340	std::vector<float> expectedOutput
				1341	{
				1342	1.0f, 4.0f, 9.0f, 16.0f
				1343	};
				1344
				1345	// Check our input and output pointers are actually aligned
				1346	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1347	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1348	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1349
				1350	INFO("Create Inference");
				1351	InputTensors inputTensors
				1352	{
				1353	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1354	};
				1355	OutputTensors outputTensors
				1356	{
				1357	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1358	};
				1359
				1360	runtime->GetProfiler(netId)->EnableProfiling(true);
				1361	std::vector<ImportedInputId> importedInputIds =
				1362	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1363	CHECK(importedInputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1364	std::vector<ImportedOutputId> importedOutputIds =
				1365	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1366	CHECK(importedOutputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1367	// Do the inference and force the import as the memory is aligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1368	runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1369
				1370	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1371	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1372	std::stringstream ss;
				1373	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1374	std::string dump = ss.str();
				1375
				1376	if (backends[0] == Compute::CpuAcc)
				1377	{
				1378	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1379	// reconfigure is implemented
				1380	int count = SubStringCounter(dump, "SyncMemGeneric");
				1381	CHECK(count == 0);
				1382	// Should be 2 CopyMemGeneric workloads
				1383	count = SubStringCounter(dump, "CopyMemGeneric");
				1384	CHECK(count >= 1);
				1385	}
				1386	else
				1387	{
				1388	// Check there is at least 1 SyncMemGeneric workload as we exported
				1389	int count = SubStringCounter(dump, "SyncMemGeneric");
				1390	CHECK(count >= 1);
				1391	// Shouldn't be any CopyMemGeneric workloads
				1392	count = SubStringCounter(dump, "CopyMemGeneric");
				1393	CHECK(count == 0);
				1394	}
				1395	// Check the output is correct
				1396	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1397
				1398	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1399	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1400	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1401	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1402
				1403	// Check if our pointer is truly misaligned
				1404	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1405
				1406	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1407	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1408	2.0f, 3.0f, 4.0f, 5.0f
				1409	};
				1410
				1411	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1412
				1413	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1414	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1415
				1416	// Check if our pointer is truly misaligned
				1417	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1418
				1419	std::vector<float> expectedMisalignedOutput
				1420	{
				1421	4.0f, 9.0f, 16.0f, 25.0f
				1422	};
				1423
				1424	INFO("Create Second Inference");
				1425	InputTensors inputTensorsMisaligned
				1426	{
				1427	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1428	};
				1429	OutputTensors outputTensorsMisaligned
				1430	{
				1431	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1432	};
				1433	importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1434	// Import should fail.
				1435	CHECK(importedInputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1436	importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1437	// Import should fail.
				1438	CHECK(importedOutputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1439
				1440	// Do the inference and force the import as the memory is misaligned.
				1441	runtime->EnqueueWorkload(netId,
				1442	inputTensorsMisaligned,
				1443	outputTensorsMisaligned,
				1444	importedInputIds,
				1445	importedOutputIds);
				1446
				1447	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1448	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1449	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1450	dump = ss.str();
				1451
				1452	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1453	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1454	// for imports/copies. Only that the output is correct.
				1455	if (backends[0] != Compute::GpuAcc)
				1456	{
				1457	// The SyncMemGeneric will still be in the profiling log from the first inference
				1458	int count = SubStringCounter(dump, "SyncMemGeneric");
				1459	CHECK(count >= 1);
				1460	// We should now see CopyMemGeneric workloads as we copied all buffers
				1461	count = SubStringCounter(dump, "CopyMemGeneric");
				1462	CHECK(count >= 1);
				1463	}
				1464	// Check the output is correct
				1465	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1466	std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1467	std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1468	for (auto outputValue : expectedMisalignedOutput)
				1469	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1470	CHECK(outputValue == alignedOutputData[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1471	++index;
				1472	}
				1473	// Clean up to avoid interfering with other tests
				1474	runtime->UnloadNetwork(netId);
				1475	std::free(inputMemPtr);
				1476	std::free(outputMemPtr);
				1477	}
				1478
				1479
				1480	inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
				1481	{
				1482	/**
				1483	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1484	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1485	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1486	* In this we create some misaligned buffers, copy them into a network and validate the output and number of
				1487	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
				1488	* to importing correctly.
				1489	*/
				1490	using namespace armnn;
				1491
				1492	IRuntime::CreationOptions options;
				1493	IRuntimePtr runtime(IRuntime::Create(options));
				1494
				1495	// Builds up the structure of the network.
				1496	INetworkPtr net(INetwork::Create());
				1497	IConnectableLayer* input = net->AddInputLayer(0);
				1498
				1499	ActivationDescriptor descriptor;
				1500	descriptor.m_Function = ActivationFunction::Square;
				1501	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1502
				1503	IConnectableLayer* output = net->AddOutputLayer(0);
				1504
				1505	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1506	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1507	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1508	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1509
				1510	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1511	INFO("Load Network");
				1512	// Load it into the runtime. It should pass.
				1513	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	1514	std::string errorMessage;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1515	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame]	1516	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				1517	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1518	INFO("Generate Data");
				1519
				1520	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1521	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1522	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1523	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1524
				1525	// Check if our pointer is truly misaligned
				1526	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1527	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1528	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1529	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1530	2.0f, 3.0f, 4.0f, 5.0f
				1531	};
				1532	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1533
				1534	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1535	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1536
				1537	// Check if our pointer is truly misaligned
				1538	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1539
				1540	std::vector<float> expectedMisalignedOutput
				1541	{
				1542	4.0f, 9.0f, 16.0f, 25.0f
				1543	};
				1544
				1545	INFO("Create Second Inference");
				1546	InputTensors inputTensorsMisaligned
				1547	{
				1548	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1549	};
				1550	OutputTensors outputTensorsMisaligned
				1551	{
				1552	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1553	};
				1554	runtime->GetProfiler(netId)->EnableProfiling(true);
				1555	std::vector<ImportedInputId> importedInputIds =
				1556	runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1557	// Import should fail.
				1558	CHECK(importedInputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1559	std::vector<ImportedOutputId> importedOutputIds =
				1560	runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1561	// Import should fail.
				1562	CHECK(importedOutputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1563
				1564	// Do the inference and force the import as the memory is misaligned.
				1565	runtime->EnqueueWorkload(netId,
				1566	inputTensorsMisaligned,
				1567	outputTensorsMisaligned,
				1568	importedInputIds,
				1569	importedOutputIds);
				1570
				1571	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1572	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1573	std::stringstream ss;
				1574	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1575	std::string dump = ss.str();
				1576
				1577	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1578	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1579	// for imports/copies. Only that the output is correct.
				1580	if (backends[0] != Compute::GpuAcc)
				1581	{
				1582	// We can only copy so there should be no SyncMemGeneric
				1583	int count = SubStringCounter(dump, "SyncMemGeneric");
				1584	CHECK(count == 0);
				1585	// Should only be CopyMemGeneric workloads as we copied all buffers
				1586	count = SubStringCounter(dump, "CopyMemGeneric");
				1587	CHECK(count >= 1);
				1588	}
				1589	// Check the output is correct
				1590	unsigned int index = 0;
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1591	std::vector<float> alignedOutput(expectedMisalignedOutput.size());
				1592	std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1593	for (auto outputValue : expectedMisalignedOutput)
				1594	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1595	CHECK(outputValue == alignedOutput[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1596	++index;
				1597	}
				1598	std::free(inputMemPtr);
				1599	std::free(outputMemPtr);
				1600
				1601	// Creates structures for input & output
				1602	std::vector<float> inputData
				1603	{
				1604	1.0f, 2.0f, 3.0f, 4.0f
				1605	};
				1606	std::vector<float> outputData(4);
				1607	std::vector<float> expectedOutput
				1608	{
				1609	1.0f, 4.0f, 9.0f, 16.0f
				1610	};
				1611
				1612	// Check our input and output pointers are actually aligned
				1613	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1614	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1615
				1616	INFO("Create Inference");
				1617	InputTensors inputTensors
				1618	{
				1619	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1620	};
				1621	OutputTensors outputTensors
				1622	{
				1623	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1624	};
				1625
				1626	importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1627	CHECK(importedInputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1628	importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1629	CHECK(importedOutputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1630	// Do the inference and force the import as the memory is aligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1631	runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1632
				1633	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1634	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1635	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1636	dump = ss.str();
				1637
				1638	if (backends[0] == Compute::CpuAcc)
				1639	{
				1640	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1641	// reconfigure is implemented
				1642	int count = SubStringCounter(dump, "SyncMemGeneric");
				1643	CHECK(count == 0);
				1644	// Should be 2 CopyMemGeneric workloads
				1645	count = SubStringCounter(dump, "CopyMemGeneric");
				1646	CHECK(count >= 1);
				1647	}
				1648	else
				1649	{
				1650	// Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
				1651	// SyncMemGeneric Workload when we previously didn't
				1652	int count = SubStringCounter(dump, "SyncMemGeneric");
				1653	CHECK(count >= 1);
				1654	// Should still be some CopyMemGeneric Workloads from the last inference
				1655	count = SubStringCounter(dump, "CopyMemGeneric");
				1656	CHECK(count >= 1);
				1657	}
				1658	// Check the output is correct
				1659	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1660	// Clean up to avoid interfering with other tests
				1661	runtime->UnloadNetwork(netId);
				1662	}
				1663
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	1664	} // anonymous namespace