Blame - src/backends/backendsCommon/test/EndToEndTestImpl.hpp - ml/armnn

blob: 795fc13c3224ab84b4ba78b56c1323e3308d3f9a [file] [log] [blame]

Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	1	//
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	2	// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	3	// SPDX-License-Identifier: MIT
				4	//
				5	#pragma once
				6
Sadik Armagan	a097d2a	2021-11-24 15:47:28 +0000	[diff] [blame]	7	#include <CommonTestUtils.hpp>
Mike Kelly	386ff1a	2021-03-29 15:04:50 +0100	[diff] [blame]	8
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	9	#include <armnn/Descriptors.hpp>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	10	#include <armnn/INetwork.hpp>
Matthew Bentham	246bd46	2020-01-20 16:16:06 +0000	[diff] [blame]	11	#include <armnn/IRuntime.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	12
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	13	#include <Profiling.hpp>
Colm Donelan	c42a987	2022-02-02 16:35:09 +0000	[diff] [blame]	14	#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	15	#include <ResolveType.hpp>
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	16
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	17	#include <doctest/doctest.h>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	18
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	19	#include <vector>
				20
				21	namespace
				22	{
				23
				24	using namespace armnn;
				25
				26	template<typename T>
				27	bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
				28	const TensorInfo& commonTensorInfo,
				29	const std::vector<T>& inputData,
				30	const std::vector<T>& constantData,
				31	const std::vector<T>& expectedOutputData)
				32	{
				33	// Create runtime in which test will run
				34	IRuntime::CreationOptions options;
				35	IRuntimePtr runtime(IRuntime::Create(options));
				36
				37	// Builds up the structure of the network.
				38	INetworkPtr net(INetwork::Create());
				39
				40	IConnectableLayer* input = net->AddInputLayer(0);
				41	IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
				42	IConnectableLayer* add = net->AddAdditionLayer();
				43	IConnectableLayer* output = net->AddOutputLayer(0);
				44
				45	input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
				46	constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
				47	add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				48
				49	// Sets the tensors in the network.
				50	input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				51	constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				52	add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
				53
				54	// optimize the network
				55	IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
				56
				57	// Loads it into the runtime.
				58	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	59	std::string errorMessage;
				60	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage);
				61	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	62
				63	// Creates structures for input & output.
				64	std::vector<T> outputData(inputData.size());
				65
				66	InputTensors inputTensors
				67	{
				68	{0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
				69	};
				70	OutputTensors outputTensors
				71	{
				72	{0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				73	};
				74
				75	// Does the inference.
				76	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				77
				78	// Checks the results.
				79	return outputData == expectedOutputData;
				80	}
				81
				82	inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
				83	{
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	84	TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
				85	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	86
				87	return ConstantUsageTest(backends,
				88	commonTensorInfo,
				89	std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
				90	std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
				91	std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
				92	);
				93	}
				94
				95	inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
				96	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	97	TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	98
				99	const float scale = 0.023529f;
				100	const int8_t offset = -43;
				101
				102	commonTensorInfo.SetQuantizationScale(scale);
				103	commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	104	commonTensorInfo.SetConstant(true);
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	105
				106	return ConstantUsageTest(backends,
				107	commonTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	108	armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
				109	armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
				110	armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar	7010400	2018-10-24 15:33:28 +0100	[diff] [blame]	111	);
				112	}
				113
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	114	// Utility function to find the number of instances of a substring within a string.
				115	int SubStringCounter(std::string& string, std::string&& substring)
				116	{
				117	std::size_t found = 0;
				118	int count = 0;
				119	// Look for the substring starting from where we last found the substring
				120	while((found = string.find(substring, found)) != std::string::npos)
				121	{
				122	count++;
				123	// Offset by substring length to avoid finding the same substring twice
				124	found += substring.length();
				125	}
				126	return count;
				127	}
				128
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	129	template<DataType ArmnnIType, DataType ArmnnOType,
				130	typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	131	void EndToEndLayerTestImpl(INetworkPtr network,
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	132	const std::map<int, std::vector<TInput>>& inputTensorData,
				133	const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilers	bca73e1	2020-03-11 12:52:46 +0000	[diff] [blame]	134	std::vector<BackendId> backends,
				135	float tolerance = 0.000001f)
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	136	{
				137	// Create runtime in which test will run
				138	IRuntime::CreationOptions options;
				139	IRuntimePtr runtime(IRuntime::Create(options));
				140
				141	// optimize the network
				142	IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
				143
				144	// Loads it into the runtime.
				145	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	146	std::string errorMessage;
				147	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage);
				148	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	149
				150	InputTensors inputTensors;
				151	inputTensors.reserve(inputTensorData.size());
				152	for (auto&& it : inputTensorData)
				153	{
				154	inputTensors.push_back({it.first,
				155	ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
				156	}
				157	OutputTensors outputTensors;
				158	outputTensors.reserve(expectedOutputData.size());
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	159	std::map<int, std::vector<TOutput>> outputStorage;
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	160	for (auto&& it : expectedOutputData)
				161	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	162	std::vector<TOutput> out(it.second.size());
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	163	outputStorage.emplace(it.first, out);
				164	outputTensors.push_back({it.first,
				165	Tensor(runtime->GetOutputTensorInfo(netId, it.first),
				166	outputStorage.at(it.first).data())});
				167	}
				168
				169	// Does the inference.
				170	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				171
				172	// Checks the results.
				173	for (auto&& it : expectedOutputData)
				174	{
kevmay01	2b4d88e	2019-01-24 14:05:09 +0000	[diff] [blame]	175	std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tar	f97f6da	2019-10-01 18:35:44 +0100	[diff] [blame]	176	for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	177	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	178	CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Teresa Charlin	2e3f4d2	2020-07-29 14:29:20 +0100	[diff] [blame]	179	"Actual output: " << out[i] << ". Expected output:" << it.second[i]);
				180
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	181	}
narpra01	b9546cf	2018-11-20 15:21:28 +0000	[diff] [blame]	182	}
				183	}
				184
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	185	inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	186	{
				187	using namespace armnn;
				188
				189	// Create runtime in which test will run
				190	IRuntime::CreationOptions options;
				191	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				192
				193	// build up the structure of the network
				194	INetworkPtr net(INetwork::Create());
				195
				196	IConnectableLayer* input = net->AddInputLayer(0);
				197
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	198	ActivationDescriptor descriptor;
				199	descriptor.m_Function = ActivationFunction::Square;
				200	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	201
				202	IConnectableLayer* output = net->AddOutputLayer(0);
				203
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	204	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				205	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	206
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	207	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	208	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	209
				210	// Optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	211	OptimizerOptions optimizedOptions;
				212	optimizedOptions.m_ImportEnabled = true;
				213	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	214	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	215
				216	// Loads it into the runtime.
				217	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	218	std::string errorMessage;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	219	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	220	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	221	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				222	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	223
				224	// Creates structures for input & output
				225	std::vector<float> inputData
				226	{
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	227	1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	228	};
				229
				230	// Misaligned input
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	231	float* misalignedInputData = reinterpret_cast<float>(reinterpret_cast<char>(inputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	232
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	233	std::vector<float> outputData(4);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	234
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	235	// Aligned output
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	236	float* alignedOutputData = outputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	237
				238	InputTensors inputTensors
				239	{
				240	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
				241	};
				242	OutputTensors outputTensors
				243	{
				244	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
				245	};
				246
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	247	runtime->GetProfiler(netId)->EnableProfiling(true);
				248
				249	// Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	250	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	251	}
				252
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	253	inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	254	{
				255	using namespace armnn;
				256
				257	// Create runtime in which test will run
				258	IRuntime::CreationOptions options;
				259	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				260
				261	// build up the structure of the network
				262	INetworkPtr net(INetwork::Create());
				263
				264	IConnectableLayer* input = net->AddInputLayer(0);
				265
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	266	ActivationDescriptor descriptor;
				267	descriptor.m_Function = ActivationFunction::Square;
				268	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	269
				270	IConnectableLayer* output = net->AddOutputLayer(0);
				271
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	272	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				273	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	274
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	275	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	276	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	277
				278	// Optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	279	OptimizerOptions optimizedOptions;
				280	optimizedOptions.m_ImportEnabled = true;
				281	optimizedOptions.m_ExportEnabled = true;
				282	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	283	CHECK(optNet);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	284
				285	// Loads it into the runtime.
				286	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	287	std::string errorMessage;
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	288	// Enable Importing and Exporting
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	289	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	290	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				291	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	292
				293	// Creates structures for input & output
				294	std::vector<float> inputData
				295	{
				296	1.0f, 2.0f, 3.0f, 4.0f, 5.0f
				297	};
				298
				299	// Aligned input
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	300	float* alignedInputData = inputData.data();
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	301
				302	std::vector<float> outputData(5);
				303
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	304	// Misaligned output
Aron Virginas-Tar	d9f7c8b	2019-09-13 13:37:03 +0100	[diff] [blame]	305	float* misalignedOutputData = reinterpret_cast<float>(reinterpret_cast<char>(outputData.data()) + 1);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	306
				307	InputTensors inputTensors
				308	{
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	309	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	310	};
				311	OutputTensors outputTensors
				312	{
				313	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
				314	};
				315
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	316	// Do the inference and expect it to fail with a ExportMemoryException
				317	if (backends[0] == Compute::CpuAcc)
				318	{
				319	// For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	320	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	321	}
				322	else
				323	{
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	324	CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	325	}
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	326	}
				327
				328	inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
				329	{
				330	using namespace armnn;
				331
				332	// Create runtime in which test will run
				333	IRuntime::CreationOptions options;
				334	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				335
				336	// build up the structure of the network
				337	INetworkPtr net(INetwork::Create());
				338
				339	IConnectableLayer* input = net->AddInputLayer(0);
				340
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	341	ActivationDescriptor descriptor;
				342	descriptor.m_Function = ActivationFunction::Square;
				343	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	344
				345	IConnectableLayer* output = net->AddOutputLayer(0);
				346
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	347	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				348	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	349
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	350	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	351	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	352
				353	// Optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	354	OptimizerOptions optimizedOptions;
				355	optimizedOptions.m_ImportEnabled = true;
				356	optimizedOptions.m_ExportEnabled = true;
				357	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	358	CHECK(optNet);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	359
				360	// Loads it into the runtime.
				361	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	362	std::string errorMessage;
David Monahan	4f1e8e4	2019-09-04 09:22:10 +0100	[diff] [blame]	363	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	364	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	365	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				366	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	367
				368	// Creates structures for input & output
				369	std::vector<float> inputData
				370	{
				371	1.0f, 2.0f, 3.0f, 4.0f
				372	};
				373
				374	std::vector<float> outputData(4);
				375
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	376	std::vector<float> expectedOutput
				377	{
				378	1.0f, 4.0f, 9.0f, 16.0f
				379	};
				380
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	381	InputTensors inputTensors
				382	{
				383	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				384	};
				385	OutputTensors outputTensors
				386	{
				387	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				388	};
				389
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	390	runtime->GetProfiler(netId)->EnableProfiling(true);
				391
				392	// Do the inference
				393	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				394
				395	// Retrieve the Profiler.Print() output to get the workload execution
				396	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				397	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	398	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	399	std::string dump = ss.str();
				400
David Monahan	3fb7e10	2019-08-20 11:25:29 +0100	[diff] [blame]	401	// Contains ActivationWorkload
				402	std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	403	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	404
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	405	// Contains SyncMemGeneric
				406	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	407	CHECK(found != std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	408
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	409	// Does not contain CopyMemGeneric
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	410	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	411	CHECK(found == std::string::npos);
James Conroy	57d10b7	2019-10-25 09:44:14 +0100	[diff] [blame]	412
				413	// Check output is as expected
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	414	CHECK(outputData == expectedOutput);
Ferran Balaguer	dcaa610	2019-08-21 13:28:38 +0100	[diff] [blame]	415	}
				416
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	417	inline void ImportOnlyWorkload(std::vector<BackendId> backends)
				418	{
				419	using namespace armnn;
				420
				421	IRuntime::CreationOptions options;
				422	IRuntimePtr runtime(IRuntime::Create(options));
				423
				424	// Builds up the structure of the network.
				425	INetworkPtr net(INetwork::Create());
				426
				427	IConnectableLayer* input = net->AddInputLayer(0);
				428
				429	ActivationDescriptor descriptor;
				430	descriptor.m_Function = ActivationFunction::Square;
				431	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				432
				433	IConnectableLayer* output = net->AddOutputLayer(0);
				434
				435	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				436	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				437
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	438	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	439	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				440
				441	// optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	442	OptimizerOptions optimizedOptions;
				443	optimizedOptions.m_ImportEnabled = true;
				444	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	445
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	446	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	447	// Load it into the runtime. It should pass.
				448	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	449	std::string errorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	450	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	451	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				452	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	453
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	454	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	455	// Creates structures for input & output
				456	std::vector<float> inputData
				457	{
				458	1.0f, 2.0f, 3.0f, 4.0f
				459	};
				460
				461	std::vector<float> outputData(4);
				462
				463	std::vector<float> expectedOutput
				464	{
				465	1.0f, 4.0f, 9.0f, 16.0f
				466	};
				467
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	468	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	469
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	470	InputTensors inputTensors
				471	{
				472	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				473	};
				474	OutputTensors outputTensors
				475	{
				476	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				477	};
				478
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	479	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	480	runtime->GetProfiler(netId)->EnableProfiling(true);
				481
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	482	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	483	// Do the inference
				484	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				485
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	486	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	487	// Retrieve the Profiler.Print() output to get the workload execution
				488	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				489	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	490	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	491	std::string dump = ss.str();
				492
				493	// Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	494	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	495	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	496	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	497
				498	// Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	499	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	500	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	501	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	502
				503	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	504	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	505	}
				506
				507	inline void ExportOnlyWorkload(std::vector<BackendId> backends)
				508	{
				509	using namespace armnn;
				510
				511	IRuntime::CreationOptions options;
				512	IRuntimePtr runtime(IRuntime::Create(options));
				513
				514	// Builds up the structure of the network.
				515	INetworkPtr net(INetwork::Create());
				516
				517	IConnectableLayer* input = net->AddInputLayer(0);
				518
				519	ActivationDescriptor descriptor;
				520	descriptor.m_Function = ActivationFunction::Square;
				521	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				522
				523	IConnectableLayer* output = net->AddOutputLayer(0);
				524
				525	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				526	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				527
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	528	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	529	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				530
				531	// optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	532	OptimizerOptions optimizedOptions;
				533	optimizedOptions.m_ExportEnabled = true;
				534	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	535
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	536	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	537	// Load it into the runtime. It should pass.
				538	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	539	std::string errorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	540	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	541	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				542	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	543
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	544	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	545	// Creates structures for input & output
				546	std::vector<float> inputData
				547	{
				548	1.0f, 2.0f, 3.0f, 4.0f
				549	};
				550
				551	std::vector<float> outputData(4);
				552
				553	std::vector<float> expectedOutput
				554	{
				555	1.0f, 4.0f, 9.0f, 16.0f
				556	};
				557
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	558	INFO("Create Inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	559
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	560	InputTensors inputTensors
				561	{
				562	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				563	};
				564	OutputTensors outputTensors
				565	{
				566	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				567	};
				568
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	569	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	570	runtime->GetProfiler(netId)->EnableProfiling(true);
				571
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	572	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	573	// Do the inference
				574	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				575
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	576	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	577	// Retrieve the Profiler.Print() output to get the workload execution
				578	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				579	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	580	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	581	std::string dump = ss.str();
				582
				583	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	584	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	585	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	586	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	587
				588	// Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	589	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	590	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	591	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	592
				593	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	594	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	595	}
				596
				597	inline void ImportAndExportWorkload(std::vector<BackendId> backends)
				598	{
				599	using namespace armnn;
				600
				601	IRuntime::CreationOptions options;
				602	IRuntimePtr runtime(IRuntime::Create(options));
				603
				604	// Builds up the structure of the network.
				605	INetworkPtr net(INetwork::Create());
				606
				607	IConnectableLayer* input = net->AddInputLayer(0);
				608
				609	ActivationDescriptor descriptor;
				610	descriptor.m_Function = ActivationFunction::Square;
				611	IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
				612
				613	IConnectableLayer* output = net->AddOutputLayer(0);
				614
				615	input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
				616	pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				617
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	618	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	619	pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				620
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	621	OptimizerOptions optimizedOptions;
				622	optimizedOptions.m_ImportEnabled = true;
				623	optimizedOptions.m_ExportEnabled = true;
				624	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	625
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	626	INFO("Load Network");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	627	// Load it into the runtime. It should pass.
				628	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	629	std::string errorMessage;
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	630	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	631	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				632	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	633
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	634	INFO("Generate Data");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	635	// Creates structures for input & output
				636	std::vector<float> inputData
				637	{
				638	1.0f, 2.0f, 3.0f, 4.0f
				639	};
				640
				641	std::vector<float> outputData(4);
				642
				643	std::vector<float> expectedOutput
				644	{
				645	1.0f, 4.0f, 9.0f, 16.0f
				646	};
				647
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	648	INFO("Create inference");
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	649
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	650	InputTensors inputTensors
				651	{
				652	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				653	};
				654	OutputTensors outputTensors
				655	{
				656	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				657	};
				658
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	659	INFO("Get Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	660	runtime->GetProfiler(netId)->EnableProfiling(true);
				661
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	662	INFO("Run Inference");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	663	// Do the inference
				664	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				665
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	666	INFO("Print Profiler");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	667	// Retrieve the Profiler.Print() output to get the workload execution
				668	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				669	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	670	profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	671	std::string dump = ss.str();
				672
				673	// Check there is a SyncMemGeneric workload as we exported
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	674	INFO("Find SyncMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	675	int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	676	CHECK(count == 1);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	677
				678	// Shouldn't be any CopyMemGeneric workloads
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	679	INFO("Find CopyMemGeneric");
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	680	count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	681	CHECK(count == 0);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	682
				683	// Check the output is correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	684	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	685	}
				686
				687	inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
				688	{
				689	using namespace armnn;
				690
				691	// Create runtime in which test will run
				692	IRuntime::CreationOptions options;
				693	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				694
				695	// build up the structure of the network
				696	INetworkPtr net(INetwork::Create());
				697
				698	IConnectableLayer* input = net->AddInputLayer(0);
				699
				700	ActivationDescriptor descriptor;
				701	descriptor.m_Function = ActivationFunction::Square;
				702	IConnectableLayer* activation = net->AddActivationLayer(descriptor);
				703
				704	IConnectableLayer* output0 = net->AddOutputLayer(0);
				705	IConnectableLayer* output1 = net->AddOutputLayer(1);
				706
				707	input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
				708	activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				709	activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
				710
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	711	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	712	activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
				713
				714	// Optimize the network
Francis Murtagh	626bd90	2022-06-21 13:16:23 +0000	[diff] [blame]	715	OptimizerOptions optimizedOptions;
				716	optimizedOptions.m_ImportEnabled = true;
				717	optimizedOptions.m_ExportEnabled = true;
				718	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	719
				720	// Loads it into the runtime.
				721	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	722	std::string errorMessage;
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	723	// Enable Importing
Francis Murtagh	73d3e2e	2021-04-29 14:23:04 +0100	[diff] [blame]	724	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	725	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				726	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	727
				728	// Creates structures for input & output
				729	std::vector<float> inputData
				730	{
				731	1.0f, 2.0f, 3.0f, 4.0f
				732	};
				733
				734	std::vector<float> outputData0(4);
				735	std::vector<float> outputData1(4);
				736
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	737	std::vector<float> expectedOutput
				738	{
				739	1.0f, 4.0f, 9.0f, 16.0f
				740	};
				741
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	742	InputTensors inputTensors
				743	{
				744	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				745	};
				746	OutputTensors outputTensors
				747	{
				748	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
				749	{1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
				750	};
				751
				752	// The result of the inference is not important, just the fact that there
				753	// should not be CopyMemGeneric workloads.
				754	runtime->GetProfiler(netId)->EnableProfiling(true);
				755
				756	// Do the inference
				757	runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
				758
				759	// Retrieve the Profiler.Print() output to get the workload execution
				760	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				761	std::stringstream ss;
				762	profilerManager.GetProfiler()->Print(ss);
				763	std::string dump = ss.str();
				764
				765	std::size_t found = std::string::npos;
				766
				767	if (backends[0] == Compute::CpuRef)
				768	{
				769	found = dump.find("RefActivationWorkload");
				770	}
				771	else if (backends[0] == Compute::CpuAcc)
				772	{
				773	found = dump.find("NeonActivationWorkload");
				774	}
				775	else if (backends[0] == Compute::GpuAcc)
				776	{
				777	found = dump.find("ClActivationWorkload");
				778	}
				779
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	780	CHECK(found != std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	781	// No contains SyncMemGeneric
				782	found = dump.find("SyncMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	783	CHECK(found == std::string::npos);
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	784	// Contains CopyMemGeneric
				785	found = dump.find("CopyMemGeneric");
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	786	CHECK(found != std::string::npos);
Narumol Prangnawarat	3b90af6	2020-06-26 11:00:21 +0100	[diff] [blame]	787
				788	// Check that the outputs are correct
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	789	CHECK(std::equal(outputData0.begin(), outputData0.end(),
				790	expectedOutput.begin(), expectedOutput.end()));
				791	CHECK(std::equal(outputData1.begin(), outputData1.end(),
				792	expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer	83239f9	2019-09-19 11:49:25 +0100	[diff] [blame]	793	}
				794
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	795	inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
				796	{
				797	using namespace armnn;
				798
				799	// Create runtime in which test will run
				800	IRuntime::CreationOptions options;
				801	IRuntimePtr runtime(armnn::IRuntime::Create(options));
				802
				803	// build up the structure of the network
				804	INetworkPtr net(INetwork::Create());
				805
				806	IConnectableLayer* input = net->AddInputLayer(0);
				807
				808	// Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
				809	// dim of the output to make it too small to hold the specified slice.
				810	StridedSliceDescriptor descriptor;
				811	descriptor.m_Begin = {0, 0};
				812	descriptor.m_End = {2, 3};
				813	descriptor.m_Stride = {1, 1};
				814	descriptor.m_BeginMask = 0;
				815	descriptor.m_EndMask = 0;
				816	descriptor.m_ShrinkAxisMask = 1;
				817	IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
				818
				819	IConnectableLayer* output0 = net->AddOutputLayer(0);
				820
				821	input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
				822	stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
				823
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	824	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	825	stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
				826
				827	// Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan	1625efc	2021-06-10 18:24:34 +0100	[diff] [blame]	828	CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan	0a99a14	2020-03-13 07:52:54 +0000	[diff] [blame]	829	}
				830
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	831	inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
				832	{
				833	/**
				834	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				835	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				836	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				837	* In this case all inputs and outputs should be imported
				838	*/
				839	using namespace armnn;
				840	IRuntime::CreationOptions options;
				841	IRuntimePtr runtime(IRuntime::Create(options));
				842
				843	// Builds up the structure of the network.
				844	INetworkPtr net(INetwork::Create());
				845	IConnectableLayer* input = net->AddInputLayer(0);
				846	ActivationDescriptor descriptor;
				847	descriptor.m_Function = ActivationFunction::Square;
				848	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				849	IConnectableLayer* output = net->AddOutputLayer(0);
				850	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				851	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				852	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				853	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				854	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				855	INFO("Load Network");
				856
				857	// Load it into the runtime. It should pass.
				858	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	859	std::string errorMessage;
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	860	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	861	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				862	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
				863
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	864	INFO("Generate Data");
				865
				866	// Creates structures for input & output
				867	std::vector<float> inputData
				868	{
				869	1.0f, 2.0f, 3.0f, 4.0f
				870	};
				871	std::vector<float> outputData(4);
				872	std::vector<float> expectedOutput
				873	{
				874	1.0f, 4.0f, 9.0f, 16.0f
				875	};
				876
				877	// Check our input and output pointers are actually aligned
				878	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				879	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				880	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				881
				882	INFO("Create Inference");
				883	InputTensors inputTensors
				884	{
				885	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				886	};
				887	OutputTensors outputTensors
				888	{
				889	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				890	};
				891
				892	runtime->GetProfiler(netId)->EnableProfiling(true);
				893	std::vector<ImportedInputId> importedInputIds =
				894	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	895	CHECK(importedInputIds.size() == 1);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	896	std::vector<ImportedOutputId> importedOutputIds =
				897	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	898	CHECK(importedOutputIds.size() == 1);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	899	// Do the inference and force the import as the memory is aligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	900	runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	901
				902	// Retrieve the Profiler.Print() output to get the workload execution
				903	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				904	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	905	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	906	std::string dump = ss.str();
				907
				908	if (backends[0] == Compute::CpuAcc)
				909	{
				910	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				911	// reconfigure is implemented
				912	int count = SubStringCounter(dump, "SyncMemGeneric");
				913	CHECK(count == 0);
				914	// Should be 2 CopyMemGeneric workloads
				915	count = SubStringCounter(dump, "CopyMemGeneric");
				916	CHECK(count == 2);
				917	}
				918	else
				919	{
				920	// Check there is a SyncMemGeneric workload as we exported
				921	int count = SubStringCounter(dump, "SyncMemGeneric");
				922	CHECK(count == 1);
				923	// Shouldn't be any CopyMemGeneric workloads
				924	count = SubStringCounter(dump, "CopyMemGeneric");
				925	CHECK(count == 0);
				926	}
				927	// Check the output is correct
				928	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				929	}
				930
				931	inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
				932	{
				933	/**
				934	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				935	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				936	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				937	* In this case all only the output should be imported
				938	*/
				939	using namespace armnn;
				940
				941	IRuntime::CreationOptions options;
				942	IRuntimePtr runtime(IRuntime::Create(options));
				943
				944	// Builds up the structure of the network.
				945	INetworkPtr net(INetwork::Create());
				946	IConnectableLayer* input = net->AddInputLayer(0);
				947
				948	ActivationDescriptor descriptor;
				949	descriptor.m_Function = ActivationFunction::Square;
				950	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				951
				952	IConnectableLayer* output = net->AddOutputLayer(0);
				953
				954	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				955	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				956	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				957	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				958
				959	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				960	INFO("Load Network");
				961	// Load it into the runtime. It should pass.
				962	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	963	std::string errorMessage;
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	964	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	965	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				966	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
				967
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	968	INFO("Generate Data");
				969
				970	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				971	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				972	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				973
				974	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				975
				976	// Check if our pointer is truly misaligned
				977	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				978	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				979
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	980	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	981	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	982	1.0f, 2.0f, 3.0f, 4.0f
				983	};
				984
				985	std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	986
				987	std::vector<float> outputData(4);
				988	// Check our output buffer is aligned
				989	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				990
				991	std::vector<float> expectedOutput
				992	{
				993	1.0f, 4.0f, 9.0f, 16.0f
				994	};
				995
				996	INFO("Create Inference");
				997	InputTensors inputTensors
				998	{
				999	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
				1000	};
				1001	OutputTensors outputTensors
				1002	{
				1003	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1004	};
				1005	runtime->GetProfiler(netId)->EnableProfiling(true);
				1006	std::vector<ImportedInputId> importedInputIds =
				1007	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1008	// We expect the import to have failed.
				1009	CHECK(importedInputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1010	std::vector<ImportedOutputId> importedOutputIds =
				1011	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1012	CHECK(importedOutputIds.size() == 1);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1013
				1014	// Do the inference and force the import as the memory is misaligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1015	runtime->EnqueueWorkload(netId, inputTensors, OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1016
				1017	// Retrieve the Profiler.Print() output to get the workload execution
				1018	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1019	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1020	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1021	std::string dump = ss.str();
				1022
				1023	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1024	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1025	// for imports/copies. Only that the output is correct.
				1026	if (backends[0] != Compute::GpuAcc)
				1027	{
				1028	if (backends[0] == Compute::CpuAcc)
				1029	{
				1030	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1031	// reconfigure is implemented
				1032	// We should get 0 SyncMemGeneric for the Output
				1033	int count = SubStringCounter(dump, "SyncMemGeneric");
				1034	CHECK(count == 0);
				1035	// Should be 2 CopyMemGeneric as we copied the input
				1036	count = SubStringCounter(dump, "CopyMemGeneric");
				1037	CHECK(count == 2);
				1038	}
				1039	else
				1040	{
				1041	// We should get 1 SyncMemGeneric for the Output
				1042	int count = SubStringCounter(dump, "SyncMemGeneric");
				1043	CHECK(count == 1);
				1044	// Should only be 1 CopyMemGeneric as we copied the input
				1045	count = SubStringCounter(dump, "CopyMemGeneric");
				1046	CHECK(count == 1);
				1047	}
				1048	}
				1049	// Check the output is correct
				1050	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1051	std::free(memPtr);
				1052	}
				1053
				1054	inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1055	{
				1056	/**
				1057	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1058	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1059	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1060	* In this case all only the input should be imported
				1061	*/
				1062	using namespace armnn;
				1063
				1064	IRuntime::CreationOptions options;
				1065	IRuntimePtr runtime(IRuntime::Create(options));
				1066
				1067	// Builds up the structure of the network.
				1068	INetworkPtr net(INetwork::Create());
				1069	IConnectableLayer* input = net->AddInputLayer(0);
				1070
				1071	ActivationDescriptor descriptor;
				1072	descriptor.m_Function = ActivationFunction::Square;
				1073	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1074
				1075	IConnectableLayer* output = net->AddOutputLayer(0);
				1076
				1077	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1078	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1079	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1080	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1081
				1082	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1083	INFO("Load Network");
				1084	// Load it into the runtime. It should pass.
				1085	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	1086	std::string errorMessage;
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1087	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	1088	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				1089	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
				1090
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1091	INFO("Generate Data");
				1092
				1093	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1094	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1095	auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1096
				1097	float* misalignedMemPtr = reinterpret_cast<float>(reinterpret_cast<char>(memPtr) + 1);
				1098
				1099	// Check if our pointer is truly misaligned
				1100	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1101	CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
				1102
				1103	// Creates structures for input & output
				1104	std::vector<float> inputData
				1105	{
				1106	1.0f, 2.0f, 3.0f, 4.0f
				1107	};
				1108
				1109	// Check our input buffer is aligned
				1110	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1111	std::vector<float> expectedOutput
				1112	{
				1113	1.0f, 4.0f, 9.0f, 16.0f
				1114	};
				1115
				1116	INFO("Create Inference");
				1117	InputTensors inputTensors
				1118	{
				1119	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1120	};
				1121	OutputTensors outputTensors
				1122	{
				1123	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
				1124	};
				1125	runtime->GetProfiler(netId)->EnableProfiling(true);
				1126	std::vector<ImportedInputId> importedInputIds =
				1127	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1128	CHECK(importedInputIds.size() == 1);
				1129	// We expect this to fail.
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1130	std::vector<ImportedOutputId> importedOutputIds =
				1131	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1132	CHECK(importedOutputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1133
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1134	// Even if importing the output failed we still expect to be able to get it to work.
				1135	runtime->EnqueueWorkload(netId, InputTensors(), outputTensors, importedInputIds, importedOutputIds);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1136
				1137	// Retrieve the Profiler.Print() output to get the workload execution
				1138	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1139	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1140	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1141	std::string dump = ss.str();
				1142
				1143	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1144	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1145	// for imports/copies. Only that the output is correct.
				1146	if (backends[0] != Compute::GpuAcc)
				1147	{
				1148	// Even though we Imported the Input we still shouldn't have a SyncMemGeneric
				1149	int count = SubStringCounter(dump, "SyncMemGeneric");
				1150	CHECK(count == 0);
				1151	// Should only be 1 CopyMemGeneric as we copied the input
				1152	count = SubStringCounter(dump, "CopyMemGeneric");
				1153	if (backends[0] == Compute::CpuAcc)
				1154	{
				1155	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1156	// reconfigure is implemented
				1157	CHECK(count == 2);
				1158	}
				1159	else
				1160	{
				1161	CHECK(count == 1);
				1162	}
				1163	// Check the output is correct
				1164	}
				1165	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1166	std::vector<float> outputData(expectedOutput.size(), 0);
				1167	std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1168	for (auto outputValue : expectedOutput)
				1169	{
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1170	CHECK(outputValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1171	++index;
				1172	}
				1173	std::free(memPtr);
				1174	}
				1175
				1176	inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
				1177	{
				1178	/**
				1179	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1180	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1181	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1182	* In this case all inputs and outputs should be copied
				1183	*/
				1184	using namespace armnn;
				1185
				1186	IRuntime::CreationOptions options;
				1187	IRuntimePtr runtime(IRuntime::Create(options));
				1188
				1189	// Builds up the structure of the network.
				1190	INetworkPtr net(INetwork::Create());
				1191	IConnectableLayer* input = net->AddInputLayer(0);
				1192
				1193	ActivationDescriptor descriptor;
				1194	descriptor.m_Function = ActivationFunction::Square;
				1195	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1196
				1197	IConnectableLayer* output = net->AddOutputLayer(0);
				1198
				1199	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1200	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1201	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1202	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1203
				1204	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1205	INFO("Load Network");
				1206	// Load it into the runtime. It should pass.
				1207	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	1208	std::string errorMessage;
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1209	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	1210	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				1211	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1212	INFO("Generate Data");
				1213
				1214	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1215	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1216	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1217	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1218
				1219	// Check if our pointer is truly misaligned
				1220	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1221	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1222	std::vector<float> inputData
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1223	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1224	1.0f, 2.0f, 3.0f, 4.0f
				1225	};
				1226	std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1227
				1228	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1229	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1230
				1231	// Check if our pointer is truly misaligned
				1232	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1233
				1234	std::vector<float> expectedOutput
				1235	{
				1236	1.0f, 4.0f, 9.0f, 16.0f
				1237	};
				1238
				1239	INFO("Create Inference");
				1240	InputTensors inputTensors
				1241	{
				1242	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1243	};
				1244	OutputTensors outputTensors
				1245	{
				1246	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1247	};
				1248	runtime->GetProfiler(netId)->EnableProfiling(true);
				1249	std::vector<ImportedInputId> importedInputIds =
				1250	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1251	// Import should have failed.
				1252	CHECK(importedInputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1253	std::vector<ImportedOutputId> importedOutputIds =
				1254	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1255	// Import should have failed.
				1256	CHECK(importedOutputIds.size() == 0);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1257
				1258	// Do the inference and force the import as the memory is misaligned.
				1259	runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
				1260
				1261	// Retrieve the Profiler.Print() output to get the workload execution
				1262	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1263	std::stringstream ss;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1264	profilerManager.GetProfiler()->Print(ss);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1265	std::string dump = ss.str();
				1266
				1267	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1268	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1269	// for imports/copies. Only that the output is correct.
				1270	if (backends[0] != Compute::GpuAcc)
				1271	{
				1272	// We can only copy so there should be no SyncMemGeneric
				1273	int count = SubStringCounter(dump, "SyncMemGeneric");
				1274	CHECK(count == 0);
				1275	// Should only be CopyMemGeneric workloads as we copied all buffers
				1276	count = SubStringCounter(dump, "CopyMemGeneric");
				1277	CHECK(count == 2);
				1278	}
				1279	// Check the output is correct
				1280	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1281	std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1282	std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
				1283	for (auto expectedValue : expectedOutput)
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1284	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1285	CHECK(expectedValue == outputData[index]);
David Monahan	646bc8a	2022-01-31 14:29:14 +0000	[diff] [blame]	1286	++index;
				1287	}
				1288	std::free(inputMemPtr);
				1289	std::free(outputMemPtr);
				1290	}
				1291
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1292	inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
				1293	{
				1294	/**
				1295	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1296	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1297	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1298	* In this we create some aligned buffers, import them into a network and validate the output and number of
				1299	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
				1300	* back to copying correctly.
				1301	*/
				1302	using namespace armnn;
				1303
				1304	IRuntime::CreationOptions options;
				1305	IRuntimePtr runtime(IRuntime::Create(options));
				1306
				1307	// Builds up the structure of the network.
				1308	INetworkPtr net(INetwork::Create());
				1309	IConnectableLayer* input = net->AddInputLayer(0);
				1310
				1311	ActivationDescriptor descriptor;
				1312	descriptor.m_Function = ActivationFunction::Square;
				1313	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1314
				1315	IConnectableLayer* output = net->AddOutputLayer(0);
				1316
				1317	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1318	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1319	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1320	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1321
				1322	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1323	INFO("Load Network");
				1324	// Load it into the runtime. It should pass.
				1325	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	1326	std::string errorMessage;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1327	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	1328	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				1329	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1330	INFO("Generate Data");
				1331
				1332	// Creates structures for input & output
				1333	std::vector<float> inputData
				1334	{
				1335	1.0f, 2.0f, 3.0f, 4.0f
				1336	};
				1337	std::vector<float> outputData(4);
				1338	std::vector<float> expectedOutput
				1339	{
				1340	1.0f, 4.0f, 9.0f, 16.0f
				1341	};
				1342
				1343	// Check our input and output pointers are actually aligned
				1344	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1345	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1346	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1347
				1348	INFO("Create Inference");
				1349	InputTensors inputTensors
				1350	{
				1351	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1352	};
				1353	OutputTensors outputTensors
				1354	{
				1355	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1356	};
				1357
				1358	runtime->GetProfiler(netId)->EnableProfiling(true);
				1359	std::vector<ImportedInputId> importedInputIds =
				1360	runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1361	CHECK(importedInputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1362	std::vector<ImportedOutputId> importedOutputIds =
				1363	runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1364	CHECK(importedOutputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1365	// Do the inference and force the import as the memory is aligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1366	runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1367
				1368	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1369	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1370	std::stringstream ss;
				1371	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1372	std::string dump = ss.str();
				1373
				1374	if (backends[0] == Compute::CpuAcc)
				1375	{
				1376	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1377	// reconfigure is implemented
				1378	int count = SubStringCounter(dump, "SyncMemGeneric");
				1379	CHECK(count == 0);
				1380	// Should be 2 CopyMemGeneric workloads
				1381	count = SubStringCounter(dump, "CopyMemGeneric");
				1382	CHECK(count >= 1);
				1383	}
				1384	else
				1385	{
				1386	// Check there is at least 1 SyncMemGeneric workload as we exported
				1387	int count = SubStringCounter(dump, "SyncMemGeneric");
				1388	CHECK(count >= 1);
				1389	// Shouldn't be any CopyMemGeneric workloads
				1390	count = SubStringCounter(dump, "CopyMemGeneric");
				1391	CHECK(count == 0);
				1392	}
				1393	// Check the output is correct
				1394	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1395
				1396	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1397	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1398	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1399	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1400
				1401	// Check if our pointer is truly misaligned
				1402	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1403
				1404	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1405	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1406	2.0f, 3.0f, 4.0f, 5.0f
				1407	};
				1408
				1409	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1410
				1411	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1412	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1413
				1414	// Check if our pointer is truly misaligned
				1415	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1416
				1417	std::vector<float> expectedMisalignedOutput
				1418	{
				1419	4.0f, 9.0f, 16.0f, 25.0f
				1420	};
				1421
				1422	INFO("Create Second Inference");
				1423	InputTensors inputTensorsMisaligned
				1424	{
				1425	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1426	};
				1427	OutputTensors outputTensorsMisaligned
				1428	{
				1429	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1430	};
				1431	importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1432	// Import should fail.
				1433	CHECK(importedInputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1434	importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1435	// Import should fail.
				1436	CHECK(importedOutputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1437
				1438	// Do the inference and force the import as the memory is misaligned.
				1439	runtime->EnqueueWorkload(netId,
				1440	inputTensorsMisaligned,
				1441	outputTensorsMisaligned,
				1442	importedInputIds,
				1443	importedOutputIds);
				1444
				1445	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1446	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1447	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1448	dump = ss.str();
				1449
				1450	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1451	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1452	// for imports/copies. Only that the output is correct.
				1453	if (backends[0] != Compute::GpuAcc)
				1454	{
				1455	// The SyncMemGeneric will still be in the profiling log from the first inference
				1456	int count = SubStringCounter(dump, "SyncMemGeneric");
				1457	CHECK(count >= 1);
				1458	// We should now see CopyMemGeneric workloads as we copied all buffers
				1459	count = SubStringCounter(dump, "CopyMemGeneric");
				1460	CHECK(count >= 1);
				1461	}
				1462	// Check the output is correct
				1463	unsigned int index = 0;
David Monahan	eef6b76	2022-02-10 16:01:58 +0000	[diff] [blame]	1464	std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1465	std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1466	for (auto outputValue : expectedMisalignedOutput)
				1467	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1468	CHECK(outputValue == alignedOutputData[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1469	++index;
				1470	}
				1471	// Clean up to avoid interfering with other tests
				1472	runtime->UnloadNetwork(netId);
				1473	std::free(inputMemPtr);
				1474	std::free(outputMemPtr);
				1475	}
				1476
				1477
				1478	inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
				1479	{
				1480	/**
				1481	* This test is similar to the Import tests above, we create a network with a square function and pass in a vector
				1482	* with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
				1483	* tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
				1484	* In this we create some misaligned buffers, copy them into a network and validate the output and number of
				1485	* SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
				1486	* to importing correctly.
				1487	*/
				1488	using namespace armnn;
				1489
				1490	IRuntime::CreationOptions options;
				1491	IRuntimePtr runtime(IRuntime::Create(options));
				1492
				1493	// Builds up the structure of the network.
				1494	INetworkPtr net(INetwork::Create());
				1495	IConnectableLayer* input = net->AddInputLayer(0);
				1496
				1497	ActivationDescriptor descriptor;
				1498	descriptor.m_Function = ActivationFunction::Square;
				1499	IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
				1500
				1501	IConnectableLayer* output = net->AddOutputLayer(0);
				1502
				1503	input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
				1504	activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
				1505	input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
				1506	activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
				1507
				1508	IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
				1509	INFO("Load Network");
				1510	// Load it into the runtime. It should pass.
				1511	NetworkId netId;
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	1512	std::string errorMessage;
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1513	INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlin	df15c4e	2023-02-21 15:16:09 +0000	[diff] [blame^]	1514	armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
				1515	CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1516	INFO("Generate Data");
				1517
				1518	// This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
				1519	// this will guarantee that the resultant buffer is misaligned and thus should always be copied.
				1520	auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1521	float* misalignedInputPtr = reinterpret_cast<float>(reinterpret_cast<char>(inputMemPtr) + 1);
				1522
				1523	// Check if our pointer is truly misaligned
				1524	uintptr_t alignment = GetDataTypeSize(DataType::Float32);
				1525	CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1526	std::vector<float> inputValues
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1527	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1528	2.0f, 3.0f, 4.0f, 5.0f
				1529	};
				1530	std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1531
				1532	auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
				1533	float* misalignedOutputPtr = reinterpret_cast<float>(reinterpret_cast<char>(outputMemPtr) + 1);
				1534
				1535	// Check if our pointer is truly misaligned
				1536	CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
				1537
				1538	std::vector<float> expectedMisalignedOutput
				1539	{
				1540	4.0f, 9.0f, 16.0f, 25.0f
				1541	};
				1542
				1543	INFO("Create Second Inference");
				1544	InputTensors inputTensorsMisaligned
				1545	{
				1546	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
				1547	};
				1548	OutputTensors outputTensorsMisaligned
				1549	{
				1550	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
				1551	};
				1552	runtime->GetProfiler(netId)->EnableProfiling(true);
				1553	std::vector<ImportedInputId> importedInputIds =
				1554	runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1555	// Import should fail.
				1556	CHECK(importedInputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1557	std::vector<ImportedOutputId> importedOutputIds =
				1558	runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1559	// Import should fail.
				1560	CHECK(importedOutputIds.size() == 0);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1561
				1562	// Do the inference and force the import as the memory is misaligned.
				1563	runtime->EnqueueWorkload(netId,
				1564	inputTensorsMisaligned,
				1565	outputTensorsMisaligned,
				1566	importedInputIds,
				1567	importedOutputIds);
				1568
				1569	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1570	ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
				1571	std::stringstream ss;
				1572	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1573	std::string dump = ss.str();
				1574
				1575	// GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
				1576	// new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
				1577	// for imports/copies. Only that the output is correct.
				1578	if (backends[0] != Compute::GpuAcc)
				1579	{
				1580	// We can only copy so there should be no SyncMemGeneric
				1581	int count = SubStringCounter(dump, "SyncMemGeneric");
				1582	CHECK(count == 0);
				1583	// Should only be CopyMemGeneric workloads as we copied all buffers
				1584	count = SubStringCounter(dump, "CopyMemGeneric");
				1585	CHECK(count >= 1);
				1586	}
				1587	// Check the output is correct
				1588	unsigned int index = 0;
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1589	std::vector<float> alignedOutput(expectedMisalignedOutput.size());
				1590	std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1591	for (auto outputValue : expectedMisalignedOutput)
				1592	{
Matthew Bentham	c92bbd7	2022-02-10 11:12:34 +0000	[diff] [blame]	1593	CHECK(outputValue == alignedOutput[index]);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1594	++index;
				1595	}
				1596	std::free(inputMemPtr);
				1597	std::free(outputMemPtr);
				1598
				1599	// Creates structures for input & output
				1600	std::vector<float> inputData
				1601	{
				1602	1.0f, 2.0f, 3.0f, 4.0f
				1603	};
				1604	std::vector<float> outputData(4);
				1605	std::vector<float> expectedOutput
				1606	{
				1607	1.0f, 4.0f, 9.0f, 16.0f
				1608	};
				1609
				1610	// Check our input and output pointers are actually aligned
				1611	CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
				1612	CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
				1613
				1614	INFO("Create Inference");
				1615	InputTensors inputTensors
				1616	{
				1617	{0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
				1618	};
				1619	OutputTensors outputTensors
				1620	{
				1621	{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
				1622	};
				1623
				1624	importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1625	CHECK(importedInputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1626	importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1627	CHECK(importedOutputIds.size() == 1);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1628	// Do the inference and force the import as the memory is aligned.
Colm Donelan	d7ceec5	2022-07-06 12:09:05 +0100	[diff] [blame]	1629	runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan	1682971	2022-02-03 17:04:59 +0000	[diff] [blame]	1630
				1631	// Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
				1632	// We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
				1633	profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
				1634	dump = ss.str();
				1635
				1636	if (backends[0] == Compute::CpuAcc)
				1637	{
				1638	// Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
				1639	// reconfigure is implemented
				1640	int count = SubStringCounter(dump, "SyncMemGeneric");
				1641	CHECK(count == 0);
				1642	// Should be 2 CopyMemGeneric workloads
				1643	count = SubStringCounter(dump, "CopyMemGeneric");
				1644	CHECK(count >= 1);
				1645	}
				1646	else
				1647	{
				1648	// Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
				1649	// SyncMemGeneric Workload when we previously didn't
				1650	int count = SubStringCounter(dump, "SyncMemGeneric");
				1651	CHECK(count >= 1);
				1652	// Should still be some CopyMemGeneric Workloads from the last inference
				1653	count = SubStringCounter(dump, "CopyMemGeneric");
				1654	CHECK(count >= 1);
				1655	}
				1656	// Check the output is correct
				1657	CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
				1658	// Clean up to avoid interfering with other tests
				1659	runtime->UnloadNetwork(netId);
				1660	}
				1661
Nattapat Chaimanowong	1fcb4ff	2019-01-24 15:25:26 +0000	[diff] [blame]	1662	} // anonymous namespace