Blame - src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp - ml/armnn

blob: a00fda7679e98ef5727c768240132cb24e1a66c4 [file] [log] [blame]

Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "Conv2dTestImpl.hpp"
				7
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	8	#include <QuantizeHelper.hpp>
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	9	#include <armnnUtils/TensorUtils.hpp>
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	10
				11	#include <armnn/ArmNN.hpp>
				12
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	13	#include <armnnUtils/DataLayoutIndexed.hpp>
				14	#include <armnnUtils/Permute.hpp>
				15
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	16	#include <backendsCommon/CpuTensorHandle.hpp>
				17
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	18	#include <backendsCommon/test/DataLayoutUtils.hpp>
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	19	#include <backendsCommon/test/TensorCopyUtils.hpp>
				20	#include <backendsCommon/test/WorkloadTestUtils.hpp>
				21
				22	#include <test/TensorHelpers.hpp>
				23
				24	#include <boost/numeric/conversion/cast.hpp>
				25
				26	#include <string>
				27
				28	//
				29	// Static data
				30	//
				31
				32	// 2-channel bias used by a number of Conv2d tests.
				33	static std::vector<float> Bias2({0, 2});
				34
				35	static std::vector<float> Bias4({1, 2, 3, 4});
				36
				37	static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
				38
				39	// 3-channel 16x8 image used as common input data for a number of Conv2d tests.
				40	static std::vector<float> ConvInput3x8x16({
				41	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				42	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				43	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				44	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				45	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				46	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				47	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				48	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				49	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				50	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				51	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				52	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				53	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				54	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				55	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				56	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				57	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				58	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				59	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				60	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				61	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				62	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				63	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				64	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
				65	});
				66
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	67	using namespace armnnUtils;
				68
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	69	//
				70	// Helper templates
				71	//
				72
				73	// Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
				74	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				75	boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
				76	{
				77	if(biasEnabled)
				78	{
				79	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	80	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias2, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	81	return bias;
				82	}
				83	else
				84	{
				85	return boost::multi_array<T, 1>();
				86	}
				87	}
				88
				89	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				90	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				91	boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
				92	{
				93	if(biasEnabled)
				94	{
				95	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	96	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias4, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	97	return bias;
				98	}
				99	else
				100	{
				101	return boost::multi_array<T, 1>();
				102	}
				103	}
				104
				105	// Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
				106	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				107	boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
				108	{
				109	if(biasEnabled)
				110	{
				111	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	112	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias8, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	113	return bias;
				114	}
				115	else
				116	{
				117	return boost::multi_array<T, 1>();
				118	}
				119	}
				120
				121	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				122	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				123	boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
				124	{
				125	const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
				126	const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
				127	const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
				128
				129	switch (outputChannels)
				130	{
				131	case 2:
				132	default:
				133	{
				134	return GetBias2<ArmnnType>(biasEnabled, qScale);
				135	}
				136	case 4:
				137	{
				138	return GetBias4<ArmnnType>(biasEnabled, qScale);
				139	}
				140	case 8:
				141	{
				142	return GetBias8<ArmnnType>(biasEnabled, qScale);
				143	}
				144	}
				145	}
				146
				147	//
				148	// Implementation templates
				149	//
				150
				151	// Mapping from input type to bias type for fully connected layers.
				152	// float => float, uint8_t => int32_t
				153	template<typename T>
				154	struct FullyConnectedBiasTypeForInputType;
				155
				156	template<>
				157	struct FullyConnectedBiasTypeForInputType<float>
				158	{
				159	using Type = float;
				160	};
				161
				162	template<>
				163	struct FullyConnectedBiasTypeForInputType<uint8_t>
				164	{
				165	using Type = int32_t;
				166	};
				167
				168	// Modifies a std::vector in-place using a specified bias.
				169	template<typename T, typename B>
				170	void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
				171	const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
				172	{
				173	BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) \|\| (!armnn::IsQuantizedType<T>()),
				174	"Invalid type and parameter combination.");
				175	BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) \|\| (!armnn::IsQuantizedType<B>()),
				176	"Invalid type and parameter combination.");
				177
				178	// Note we need to dequantize and re-quantize the image value and the bias.
				179	for (uint32_t i = 0; i < bias.size(); ++i)
				180	{
				181	float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
				182	for (uint32_t y = 0; y < h; ++y)
				183	{
				184	for (uint32_t x = 0; x < w; ++x)
				185	{
				186	uint32_t offset = (i * h + y) * w + x;
				187	BOOST_ASSERT(offset < v.size());
				188	T& outRef = v[offset];
				189	float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
				190	outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
				191	}
				192	}
				193	}
				194	}
				195
				196	//
				197	// Convolution2d implementations
				198	//
				199
				200	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				201	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				202	LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
				203	armnn::IWorkloadFactory& workloadFactory,
				204	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				205	const boost::multi_array<T, 4>& originalInput,
				206	const boost::multi_array<T, 4>& originalKernel,
				207	const boost::multi_array<B, 1>& bias,
				208	const boost::multi_array<T, 4>& originalOutputExpected,
				209	float qScale,
				210	int32_t qOffset,
				211	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				212	uint32_t padLeft = 0,
				213	uint32_t padTop = 0,
				214	uint32_t padRight = 0,
				215	uint32_t padBottom = 0,
				216	uint32_t strideX = 1,
				217	uint32_t strideY = 1,
				218	uint32_t dilationX = 1,
				219	uint32_t dilationY = 1)
				220	{
Derek Lamberti	c374ff0	2019-12-10 21:57:35 +0000	[diff] [blame^]	221	boost::ignore_unused(memoryManager);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	222	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				223	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				224	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				225	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				226
				227	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				228	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				229	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				230	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				231
				232	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				233	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				234	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				235	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				236
				237	bool biasEnabled = bias.size() > 0;
				238
				239	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				240	BOOST_ASSERT(inputNum == 1);
				241	BOOST_ASSERT(outputNum == 1);
				242
				243	// If a bias is used, its size must equal the number of output channels.
				244	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				245
				246
				247	// Note these tensors will use two (identical) batches.
				248	armnn::TensorInfo inputTensorInfo =
				249	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				250	armnn::TensorInfo outputTensorInfo =
				251	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				252	armnn::TensorInfo kernelDesc =
				253	armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
				254	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				255
				256	// Set quantization parameters if the requested type is a quantized type.
				257	if(armnn::IsQuantizedType<T>())
				258	{
				259	inputTensorInfo.SetQuantizationScale(qScale);
				260	inputTensorInfo.SetQuantizationOffset(qOffset);
				261	outputTensorInfo.SetQuantizationScale(qScale);
				262	outputTensorInfo.SetQuantizationOffset(qOffset);
				263	kernelDesc.SetQuantizationScale(qScale);
				264	kernelDesc.SetQuantizationOffset(qOffset);
				265	biasDesc.SetQuantizationScale(qScale*qScale);
				266	biasDesc.SetQuantizationOffset(0);
				267	}
				268
				269	LayerTestResult<T, 4> ret(outputTensorInfo);
				270
				271	// Construct input data - two batches of the same input image.
				272	std::vector<T> inputImage;
				273	inputImage.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				274	std::vector<T> inputData;
				275	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				276	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				277
				278	// at this point if we require it permute the input data
				279	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				280	if (layout == armnn::DataLayout::NHWC)
				281	{
				282	std::vector<T> tmp(inputData.size());
				283	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				284	inputData = tmp;
				285	}
				286
				287	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				288
				289	std::vector<T> outputImage;
				290	outputImage.assign(originalOutputExpected.data(),
				291	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				292
				293	// Apply bias to output image if it is enabled.
				294	if(biasEnabled)
				295	{
				296	std::vector<T> biasV;
				297	biasV.assign(bias.data(), bias.data() + outputChannels);
				298	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				299	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				300	outputWidth, outputHeight);
				301	}
				302
				303	// Construct expected output data - two identical images.
				304	std::vector<T> outputData;
				305	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				306	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				307
				308	// at this point if we require it permute the expected output
				309	if (layout == armnn::DataLayout::NHWC)
				310	{
				311	std::vector<T> tmp(outputData.size());
				312	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				313	outputData = tmp;
				314	}
				315	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				316
				317	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				318	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				319
				320	armnn::Convolution2dQueueDescriptor data;
				321	armnn::WorkloadInfo info;
				322	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				323	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				324	// Permute the kernel if necessary
				325	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				326	if (layout == armnn::DataLayout::NHWC)
				327	{
				328	armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
				329	}
				330	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				331
				332	if(biasEnabled)
				333	{
				334	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				335	}
				336
				337	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				338	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				339
				340	data.m_Weight = &weightsTensor;
				341	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				342	data.m_Parameters.m_StrideX = strideX;
				343	data.m_Parameters.m_StrideY = strideY;
				344	data.m_Parameters.m_PadLeft = padLeft;
				345	data.m_Parameters.m_PadRight = padRight;
				346	data.m_Parameters.m_PadTop = padTop;
				347	data.m_Parameters.m_PadBottom = padBottom;
				348	data.m_Parameters.m_BiasEnabled = biasEnabled;
				349	data.m_Parameters.m_DataLayout = layout;
				350	data.m_Parameters.m_DilationX = dilationX;
				351	data.m_Parameters.m_DilationY = dilationY;
				352
				353	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				354	inputHandle->Allocate();
				355	outputHandle->Allocate();
				356
				357	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				358
				359	ExecuteWorkload(*workload, memoryManager);
				360
				361	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				362
				363	return ret;
				364	}
				365
				366	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				367	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				368	LayerTestResult<T, 4> SimpleConvolution2dNhwcTestImpl(
				369	armnn::IWorkloadFactory& workloadFactory,
				370	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				371	const boost::multi_array<T, 4>& input,
				372	const boost::multi_array<T, 4>& kernel,
				373	const boost::multi_array<B, 1>& bias,
				374	const boost::multi_array<T, 4>& outputExpected,
				375	const armnn::DataLayout dataLayout,
				376	float qScale,
				377	int32_t qOffset,
				378	uint32_t padLeft = 1,
				379	uint32_t padTop = 1,
				380	uint32_t padRight = 1,
				381	uint32_t padBottom = 1,
				382	uint32_t strideX = 1,
				383	uint32_t strideY = 1)
				384	{
Derek Lamberti	c374ff0	2019-12-10 21:57:35 +0000	[diff] [blame^]	385	boost::ignore_unused(qScale, qOffset);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	386	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				387	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
				388	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
				389	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
				390
				391	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				392	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				393	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				394	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				395
				396	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				397	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				398	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				399	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				400
				401	bool biasEnabled = bias.size() > 0;
				402
				403	// Creates the tensors.
				404	armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
				405	armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
				406	ArmnnType);
				407	armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
				408	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				409
				410	// Construct the input data.
				411	std::vector<T> inputData;
				412	inputData.assign(input.data(), input.data() + inputHeightinputWidthinputChannels);
				413	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				414
				415	// Construct the output data, with bias applied, as appropriate.
				416	std::vector<T> outputData;
				417	outputData.assign(outputExpected.data(), outputExpected.data() + outputHeightoutputWidthoutputChannels);
				418
				419	LayerTestResult<T, 4> ret(outputTensorInfo);
				420	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				421
				422	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				423	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				424
				425	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				426	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				427
				428	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				429
				430	armnn::Convolution2dQueueDescriptor data;
				431
				432	data.m_Weight = &weightsTensor;
				433	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				434	data.m_Parameters.m_StrideX = strideX;
				435	data.m_Parameters.m_StrideY = strideY;
				436	data.m_Parameters.m_PadLeft = padLeft;
				437	data.m_Parameters.m_PadRight = padRight;
				438	data.m_Parameters.m_PadTop = padTop;
				439	data.m_Parameters.m_PadBottom = padBottom;
				440	data.m_Parameters.m_BiasEnabled = biasEnabled;
				441	data.m_Parameters.m_DataLayout = dataLayout;
				442
				443	armnn::WorkloadInfo info;
				444	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				445	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				446
				447	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				448	inputHandle->Allocate();
				449	outputHandle->Allocate();
				450
				451	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				452
				453	ExecuteWorkload(*workload, memoryManager);
				454
				455	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				456
				457	return ret;
				458	}
				459
				460	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				461	LayerTestResult<T,4> Convolution1dTestImpl(
				462	armnn::IWorkloadFactory& workloadFactory,
				463	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				464	float qScale,
				465	int32_t qOffset,
				466	bool biasEnabled)
				467	{
				468	using B = armnn::ResolveType<ArmnnBType>;
				469	// Until we have a specialist 1D convolution layer, we can fake one using
				470	// 2D convolution with the final dimension set to 1.
				471	// I don't anticipate this being particularly slow, given that convolution is implemented
				472	// as a matrix multiplication, at which point dimension doesn't matter.
				473
				474	unsigned int batchSize = 1;
				475	unsigned int inputChannels = 2;
				476	unsigned int outputChannels = 3;
				477	unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
				478	unsigned int kernelSize = 3;
				479	unsigned int padSize = 2;
				480	unsigned int stride = 1;
				481	unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
				482
				483	armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
				484	armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
				485	armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
				486	armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
				487
				488	// Set quantization parameters if the requested type is a quantized type.
				489	if(armnn::IsQuantizedType<T>())
				490	{
				491	inputInfo.SetQuantizationScale(qScale);
				492	inputInfo.SetQuantizationOffset(qOffset);
				493	outputInfo.SetQuantizationScale(qScale);
				494	outputInfo.SetQuantizationOffset(qOffset);
				495	kernelInfo.SetQuantizationScale(qScale);
				496	kernelInfo.SetQuantizationOffset(qOffset);
				497	biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
				498	biasInfo.SetQuantizationOffset(0);
				499	}
				500
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	501	std::vector<T> inputData = QuantizedVector<T>(
				502	{
				503	5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
				504	-3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
				505	},
				506	inputInfo.GetQuantizationScale(),
				507	inputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	508
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	509	std::vector<T> kernelData = QuantizedVector<T>(
				510	{
				511	1.0f, 0.0f, 0.0f,
				512	0.0f, 2.0f, -1.5f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	513
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	514	0.0f, 0.0f, 0.0f,
				515	0.2f, 0.2f, 0.2f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	516
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	517	0.5f, 0.0f, 0.5f,
				518	0.0f, -1.0f, 0.0f
				519	},
				520	kernelInfo.GetQuantizationScale(),
				521	kernelInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	522
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	523	std::vector<B> biasData =
				524	QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	525
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	526	std::vector<T> outputData = QuantizedVector<T>(
				527	{
				528	4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	529	-0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	530	2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
				531	},
				532	outputInfo.GetQuantizationScale(),
				533	outputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	534
				535	// Optionally apply bias to output image.
				536	if(biasEnabled)
				537	{
				538	ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
				539	biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
				540	1, outputSize);
				541	}
				542
				543	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				544	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				545
				546	armnn::Convolution2dQueueDescriptor data;
				547	armnn::WorkloadInfo info;
				548	armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
				549	armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
				550
				551	AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
				552	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				553
				554	AddInputToWorkload(data, info, inputInfo, inputHandle.get());
				555	AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
				556
				557	data.m_Weight = &weightsTensor;
				558	data.m_Bias = &biasTensor;
				559	data.m_Parameters.m_StrideX = 1;
				560	data.m_Parameters.m_StrideY = stride;
				561	data.m_Parameters.m_PadLeft = 0;
				562	data.m_Parameters.m_PadRight = 0;
				563	data.m_Parameters.m_PadTop = padSize;
				564	data.m_Parameters.m_PadBottom = padSize;
				565	data.m_Parameters.m_BiasEnabled = biasEnabled;
				566
				567	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				568	inputHandle->Allocate();
				569	outputHandle->Allocate();
				570
				571	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				572
				573	ExecuteWorkload(*workload, memoryManager);
				574
				575	// Output
				576	LayerTestResult<T,4> ret(outputInfo);
				577	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				578	ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
				579	return ret;
				580	}
				581
				582	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				583	LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
				584	armnn::IWorkloadFactory& workloadFactory,
				585	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				586	float qScale,
				587	int32_t qOffset,
				588	bool biasEnabled,
				589	armnn::DataLayout dataLayout)
				590	{
Derek Lamberti	c374ff0	2019-12-10 21:57:35 +0000	[diff] [blame^]	591	boost::ignore_unused(biasEnabled);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	592	// Use common single-batch 5x5 image.
				593
				594	armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
				595	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				596	{
				597	1, 5, 2, 3,
				598	8, 7, 3, 6,
				599	3, 3, 9, 1
				600	});
				601
				602
				603	// Use a 2-element batch of 3-channel 3x3 kernels.
				604	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				605	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
				606	4, 5, 6,
				607	0, 0, 0,
				608	3, 2, 1
				609	});
				610
				611	// Expected output is 1 batch of a 5x5 image.
				612	armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
				613
				614	const std::vector<float> outputData =
				615	{
				616	23, 41, 33, 21,
				617	44, 65, 76, 52,
				618	82, 85, 79, 42
				619	};
				620
				621	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				622
				623	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				624	workloadFactory,
				625	memoryManager,
				626	input,
				627	kernel,
				628	boost::multi_array<T, 1>(),
				629	expectedOutput,
				630	dataLayout,
				631	qScale,
				632	qOffset);
				633	}
				634
				635	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				636	LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
				637	armnn::IWorkloadFactory& workloadFactory,
				638	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				639	float qScale,
				640	int32_t qOffset,
				641	bool biasEnabled,
				642	const armnn::DataLayout& dataLayout)
				643	{
Derek Lamberti	c374ff0	2019-12-10 21:57:35 +0000	[diff] [blame^]	644	boost::ignore_unused(biasEnabled);
				645
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	646	// Input is a single-batch, 1 channel, 5x5 image.
				647	armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
				648	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				649	{
				650	1, 5, 2, 3, 5,
				651	8, 7, 3, 6, 3,
				652	3, 3, 9, 1, 9,
				653	4, 1, 8, 1, 3,
				654	6, 8, 1, 9, 2
				655	});
				656
				657	// Use a 3x3 kernel.
				658	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				659	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
				660	{
				661	4, 5, 6,
				662	0, 0, 0,
				663	3, 2, 1
				664	});
				665
				666	// Expected output is a single-batch, 1 channel, 3x3 image.
				667	armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
				668
				669	const std::vector<T> outputData =
				670	{
				671	23, 33, 24,
				672	91, 99, 48,
				673	26, 50, 19
				674	};
				675
				676	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				677
				678	uint32_t padLeft = 1;
				679	uint32_t padTop = 1;
				680	uint32_t padRight = 1;
				681	uint32_t padBottom = 1;
				682	uint32_t strideX = 2;
				683	uint32_t strideY = 2;
				684
				685	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				686	workloadFactory,
				687	memoryManager,
				688	input,
				689	kernel,
				690	boost::multi_array<T, 1>(),
				691	expectedOutput,
				692	dataLayout,
				693	qScale,
				694	qOffset,
				695	padLeft,
				696	padTop,
				697	padRight,
				698	padBottom,
				699	strideX,
				700	strideY);
				701	}
				702
				703	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				704	LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
				705	armnn::IWorkloadFactory& workloadFactory,
				706	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				707	float qScale,
				708	int32_t qOffset,
				709	bool biasEnabled,
				710	const armnn::DataLayout layout)
				711	{
				712	// Use common single-batch 3-channel 16x8 image.
				713	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	714	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	715
				716	// Use a 2-element batch with 3-channel 3x5 kernels.
				717	armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
				718	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	719	QuantizedVector<T>({
				720	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	721	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	722	1, 1, 1,
				723	1, 1, 1,
				724	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	725
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	726	0, 0, 0,
				727	0, 0, 0,
				728	0, 0, 0,
				729	0, 0, 0,
				730	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	731
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	732	2, 2, 2,
				733	2, 2, 2,
				734	2, 2, 2,
				735	2, 2, 2,
				736	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	737
				738
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	739	0, 0, 0,
				740	0, 0, 0,
				741	0, 0, 0,
				742	0, 0, 0,
				743	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	744
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	745	1, 1, 1,
				746	1, 1, 1,
				747	1, 1, 1,
				748	1, 1, 1,
				749	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	750
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	751	0, 0, 0,
				752	0, 0, 0,
				753	0, 0, 0,
				754	0, 0, 0,
				755	0, 0, 0
				756	},
				757	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	758
				759	// Expected output is 2 batch elements of a 1-channel 14x4 image.
				760	armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
				761	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	762	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	763	-24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
				764	-25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
				765	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				766	-23.5f, -23.5f, -23.5f,
				767	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				768	-23.5f, -23.5f, -23.5f,
				769
				770	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				771	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				772	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				773	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	774	},
				775	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	776
				777	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				778	workloadFactory,
				779	memoryManager,
				780	input,
				781	kernel,
				782	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				783	expectedOutput,
				784	qScale,
				785	qOffset,
				786	layout);
				787	}
				788
				789	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				790	typename T = armnn::ResolveType<ArmnnType>>
				791	LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
				792	armnn::IWorkloadFactory& workloadFactory,
				793	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				794	float qScale,
				795	int32_t qOffset,
				796	bool biasEnabled,
				797	const armnn::DataLayout layout)
				798	{
				799	// Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
				800
				801	// Use common single-batch 3-channel 16x8 image.
				802	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	803	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	804
				805	// Use a 2-element batch of 3-channel 3x3 kernels.
				806	armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
				807	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	808	QuantizedVector<T>({
				809	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	810	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	811	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	812
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	813	0, 0, 0,
				814	0, 0, 0,
				815	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	816
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	817	2, 2, 2,
				818	2, 2, 2,
				819	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	820
				821
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	822	0, 0, 0,
				823	0, 0, 0,
				824	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	825
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	826	1, 1, 1,
				827	1, 1, 1,
				828	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	829
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	830	0, 0, 0,
				831	0, 0, 0,
				832	0, 0, 0
				833	},
				834	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	835
				836	// Expected output is 1 batch of a 2-channel 14x6 image.
				837	armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
				838	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	839	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	840	-15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
				841	-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
				842	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				843	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				844	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				845	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				846
				847	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				848	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				849	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				850	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				851	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				852	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	853	},
				854	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	855
				856	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				857	workloadFactory,
				858	memoryManager,
				859	input,
				860	kernel,
				861	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				862	expectedOutput,
				863	qScale,
				864	qOffset,
				865	layout);
				866	}
				867
				868	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				869	typename T = armnn::ResolveType<ArmnnType>>
				870	LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
				871	armnn::IWorkloadFactory& workloadFactory,
				872	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				873	const armnn::DataLayout layout,
				874	float qScale,
				875	int32_t qOffset)
				876	{
				877	// Use a single-batch 1-channel 3x3 image as input.
				878	armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
				879	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	880	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	881	11,21,31,
				882	12,22,32,
				883	13,23,33
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	884	},
				885	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	886
				887	// Use 1 batch of a 1-channel 2x2 kernel.
				888	armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
				889	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	890	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	891	-11,-21,
				892	-12,-22,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	893	},
				894	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	895
				896	// Expected output is 1 batch of a 1-channel 6x8 image.
				897	// Manually calculated like this:
				898	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				899	//[-110 -210 -120 -2211 ; -110 -210 -1211 -2221 ; -110 -210 -1221 -2231 ; -110 -210 -1231 -220 ..]
				900	//[-110 -2111 -120 -2212 ; -1111 -2121 -1212 -2222 ; -1121 -2131 -1222 -2232 ; -1131 -210 -1232 -220 ..]
				901	//[-110 -2112 -120 -2213 ; -1112 -2122 -1213 -2223 ; -1122 -2132 -1223 -2233 ; -1132 -210 -1233 -220 ..]
				902	//[-110 -2113 -120 -220 ; -1113 -2123 -120 -220 ; -1123 -2133 -120 -220 ; -1133 -210 -120 -220 ..]
				903	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				904	//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
				905	armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
				906	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	907	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	908	0, 0, 0, 0, 0, 0,
				909	-242, -594, -934, -372, 0, 0,
				910	-495, -1190, -1850, -725, 0, 0,
				911	-538, -1256, -1916, -748, 0, 0,
				912	-273, -626, -946, -363, 0, 0,
				913	0, 0, 0, 0, 0, 0,
				914	0, 0, 0, 0, 0, 0,
				915	0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	916	},
				917	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	918
				919	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				920	workloadFactory,
				921	memoryManager,
				922	input,
				923	kernel,
				924	GetBias2<ArmnnBType>(false, qScale * qScale),
				925	expectedOutput,
				926	qScale,
				927	qOffset,
				928	layout,
				929	1, // Padding left.
				930	2, // Padding top.
				931	3, // Padding right.
				932	4); // Padding bottom.
				933	}
				934
				935	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				936	typename T = armnn::ResolveType<ArmnnType>>
				937	LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
				938	armnn::IWorkloadFactory& workloadFactory,
				939	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				940	const armnn::DataLayout layout,
				941	float qScale,
				942	int32_t qOffset)
				943	{
				944	// Use a single-batch 1-channel 5x5 image as input.
				945	armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
				946	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	947	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	948	11,21,31,41,51,
				949	12,22,32,42,52,
				950	13,23,33,43,53,
				951	14,24,34,44,54,
				952	15,25,35,45,55,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	953	}, qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	954
				955	// Use 1 batch of a 1-channel 4x4 kernel.
				956	armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
				957	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	958	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	959	-11,-21,-31,-41,
				960	-12,-22,-32,-42,
				961	-13,-23,-33,-43,
				962	-14,-24,-34,-44,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	963	},
				964	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	965
				966	// Expected output is 1 batch of a 1-channel 5x5 image.
				967	armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
				968	std::vector<T> myVec(outputDesc.GetNumElements(), 0);
				969	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	970	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	971	-7140, -10580, -13940, -9300, -5230,
				972	-9590, -14120, -18520, -12290, -6860,
				973	-9980, -14560, -18960, -12560, -7000,
				974	-7518, -10904, -14144, -9318, -5152,
				975	-5032, -7256, -9376, -6142, -3368,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	976	},
				977	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	978
				979	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				980	workloadFactory,
				981	memoryManager,
				982	input,
				983	kernel,
				984	GetBias2<ArmnnBType>(false, qScale * qScale),
				985	expectedOutput,
				986	qScale,
				987	qOffset,
				988	layout,
				989	1, // Padding left.
				990	1, // Padding top.
				991	2, // Padding right.
				992	2); // Padding bottom.
				993	}
				994
				995	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				996	LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
				997	armnn::IWorkloadFactory& workloadFactory,
				998	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				999	const std::vector<float>& inputNoQuantizedValues,
				1000	armnn::TensorInfo& inputTensorInfo,
				1001	const std::vector<float>& kernelNoQuantizedValues,
				1002	armnn::TensorInfo& kernelTensorInfo,
				1003	const std::vector<float>& outputExpectedNoQuantizedValues,
				1004	armnn::TensorInfo& outputTensorInfo,
				1005	uint32_t dilationX,
				1006	uint32_t dilationY,
				1007	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1008	uint32_t padLeft = 0,
				1009	uint32_t padTop = 0,
				1010	uint32_t padRight = 0,
				1011	uint32_t padBottom = 0,
				1012	uint32_t strideX = 1,
				1013	uint32_t strideY = 1,
				1014	bool biasEnabled = false
				1015	)
				1016	{
				1017	float qScale;
				1018	int32_t qOffset;
				1019	switch (ArmnnType)
				1020	{
				1021	case armnn::DataType::QuantisedAsymm8:
				1022	{
				1023	qScale = 0.1f;
				1024	qOffset = 128;
				1025	break;
				1026	}
				1027	case armnn::DataType::QuantisedSymm16:
				1028	{
				1029	qScale = 0.1f;
				1030	qOffset = 0;
				1031	break;
				1032	}
				1033	case armnn::DataType::Float32:
				1034	default:
				1035	{
				1036	qScale = 0.f;
				1037	qOffset = 0;
				1038	break;
				1039	}
				1040	}
				1041
				1042	inputTensorInfo.SetQuantizationScale(qScale);
				1043	inputTensorInfo.SetQuantizationOffset(qOffset);
				1044	kernelTensorInfo.SetQuantizationScale(qScale);
				1045	kernelTensorInfo.SetQuantizationOffset(qOffset);
				1046	outputTensorInfo.SetQuantizationScale(qScale);
				1047	outputTensorInfo.SetQuantizationOffset(qOffset);
				1048
				1049	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1050	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				1051	inputTensorInfo.GetQuantizationScale(),
				1052	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1053	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1054	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				1055	kernelTensorInfo.GetQuantizationScale(),
				1056	kernelTensorInfo.GetQuantizationOffset())));
				1057	auto expectedOutput =
				1058	MakeTensor<T, 4>(outputTensorInfo,
				1059	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				1060	outputTensorInfo.GetQuantizationScale(),
				1061	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1062
				1063	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				1064	workloadFactory,
				1065	memoryManager,
				1066	input,
				1067	kernel,
				1068	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				1069	expectedOutput,
				1070	qScale,
				1071	qOffset,
				1072	layout,
				1073	padLeft,
				1074	padTop,
				1075	padRight,
				1076	padBottom,
				1077	strideX,
				1078	strideY,
				1079	dilationX,
				1080	dilationY);
				1081	}
				1082
				1083	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1084	LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
				1085	armnn::IWorkloadFactory& workloadFactory,
				1086	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1087	bool biasEnabled,
				1088	const armnn::DataLayout layout)
				1089	{
				1090	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1091	std::vector<float> inputNoQuantizedValues =
				1092	{
				1093	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1094	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1095	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1096	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1097	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1098	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1099	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1100	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1101	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1102	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1103	};
				1104
				1105	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				1106	std::vector<float> kernelNoQuantizedValues =
				1107	{
				1108	1, 2, 3,
				1109	4, 5, 6,
				1110	7, 8, 9
				1111	};
				1112
				1113	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1114	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1115	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1116	std::vector<float> outputExpectedNoQuantizedValues =
				1117	{
				1118	6., 5., 5., 5.,
				1119	6., 5., 5., 5.,
				1120	6., 5., 5., 5.,
				1121	3., 2., 2., 2.
				1122	};
				1123
				1124	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1125	workloadFactory,
				1126	memoryManager,
				1127	inputNoQuantizedValues,
				1128	inputTensorInfo,
				1129	kernelNoQuantizedValues,
				1130	kernelTensorInfo,
				1131	outputExpectedNoQuantizedValues,
				1132	outputTensorInfo,
				1133	3,
				1134	3,
				1135	layout,
				1136	biasEnabled);
				1137	}
				1138
				1139	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1140	LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
				1141	armnn::IWorkloadFactory& workloadFactory,
				1142	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1143	bool biasEnabled,
				1144	const armnn::DataLayout layout)
				1145	{
				1146	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				1147	std::vector<float> inputNoQuantizedValues =
				1148	{
				1149	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1150	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1151	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1152	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1153	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1154	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1155	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1156	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1157	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1158	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1159
				1160	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1161	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1162	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1163	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1164	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1165	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1166	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1167	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1168	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1169	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1170	};
				1171
				1172	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				1173	std::vector<float> kernelNoQuantizedValues =
				1174	{
				1175	1, 2, 3,
				1176	4, 5, 6,
				1177	7, 8, 9,
				1178
				1179	1, 2, 3,
				1180	4, 5, 6,
				1181	7, 8, 9
				1182	};
				1183
				1184	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1185	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1186	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1187	std::vector<float> outputExpectedNoQuantizedValues =
				1188	{
				1189	12., 10., 10., 10.,
				1190	12., 10., 10., 10.,
				1191	12., 10., 10., 10.,
				1192	6., 4., 4., 4.
				1193	};
				1194
				1195	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1196	workloadFactory,
				1197	memoryManager,
				1198	inputNoQuantizedValues,
				1199	inputTensorInfo,
				1200	kernelNoQuantizedValues,
				1201	kernelTensorInfo,
				1202	outputExpectedNoQuantizedValues,
				1203	outputTensorInfo,
				1204	3,
				1205	3,
				1206	layout,
				1207	biasEnabled);
				1208	}
				1209
				1210	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1211	LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
				1212	armnn::IWorkloadFactory &workloadFactory,
				1213	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				1214	bool biasEnabled,
				1215	const armnn::DataLayout layout)
				1216	{
				1217	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1218	std::vector<float> inputNoQuantizedValues =
				1219	{
				1220	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1221	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1222	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1223	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1224	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1225	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1226	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1227	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1228	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1229	1, 1, 1, 1, 1, 1, 1, 1, 1, 1
				1230	};
				1231
				1232	armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
				1233	std::vector<float> kernelNoQuantizedValues =
				1234	{
				1235	1, 2,
				1236	3, 4
				1237	};
				1238
				1239	// Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
				1240	// therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
				1241	// where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
				1242	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1243	std::vector<float> outputExpectedNoQuantizedValues =
				1244	{
				1245	4, 7, 7, 3,
				1246	6, 10, 10, 4,
				1247	6, 10, 10, 4,
				1248	2, 3, 3, 1
				1249	};
				1250	uint32_t padLeft = 1;
				1251	uint32_t padTop = 1;
				1252	uint32_t padRight = 1;
				1253	uint32_t padBottom = 1;
				1254
				1255	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1256	workloadFactory,
				1257	memoryManager,
				1258	inputNoQuantizedValues,
				1259	inputTensorInfo,
				1260	kernelNoQuantizedValues,
				1261	kernelTensorInfo,
				1262	outputExpectedNoQuantizedValues,
				1263	outputTensorInfo,
				1264	2,
				1265	2,
				1266	layout,
				1267	padLeft,
				1268	padTop,
				1269	padRight,
				1270	padBottom,
				1271	3,
				1272	3,
				1273	biasEnabled
				1274	);
				1275	}
				1276
				1277	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				1278	LayerTestResult<T,4> CompareConvolution2dTestImpl(
				1279	armnn::IWorkloadFactory& workloadFactory,
				1280	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1281	armnn::IWorkloadFactory& refWorkloadFactory)
				1282	{
				1283	unsigned int inputHeight = 8;
				1284	unsigned int inputWidth = 16;
				1285	unsigned int inputChannels = 3;
				1286	unsigned int inputNum = 5;
				1287
				1288	unsigned int kernelHeight = 3;
				1289	unsigned int kernelWidth = 3;
				1290
				1291	unsigned int strideX = 2;
				1292	unsigned int strideY = 3;
				1293	unsigned int padX = 1;
				1294	unsigned int padY = 1;
				1295
				1296	unsigned int outputNum = inputNum;
				1297	unsigned int outputChannels = 2;
				1298	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				1299	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				1300
				1301	armnn::TensorInfo inputTensorInfo;
				1302	armnn::TensorInfo outputTensorInfo;
				1303	armnn::TensorInfo kernelDesc;
				1304	armnn::TensorInfo biasDesc;
				1305
				1306	unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
				1307	unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
				1308	unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
				1309	unsigned int biasShape[] = {outputChannels};
				1310
				1311	inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
				1312	outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
				1313	kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
				1314	biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
				1315
				1316	LayerTestResult<T,4> ret(outputTensorInfo);
				1317
				1318	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
				1319	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
				1320	auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
				1321
				1322	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1323	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1324
				1325	armnn::Convolution2dQueueDescriptor data;
				1326	armnn::WorkloadInfo info;
				1327	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1328	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1329
				1330	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1331	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1332
				1333	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1334	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1335	data.m_Weight = &weightsTensor;
				1336	data.m_Bias = &biasTensor;
				1337	data.m_Parameters.m_StrideX = strideX;
				1338	data.m_Parameters.m_StrideY = strideY;
				1339	data.m_Parameters.m_PadLeft = padX;
				1340	data.m_Parameters.m_PadRight = padX;
				1341	data.m_Parameters.m_PadTop = padY;
				1342	data.m_Parameters.m_PadBottom = padY;
				1343	data.m_Parameters.m_BiasEnabled = true;
				1344
				1345	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				1346	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				1347
				1348	armnn::Convolution2dQueueDescriptor refData = data;
				1349	armnn::WorkloadInfo refInfo = info;
				1350	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				1351	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				1352
				1353	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				1354	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
				1355
				1356	outputHandleRef->Allocate();
				1357	inputHandleRef->Allocate();
				1358
				1359	inputHandle->Allocate();
				1360	outputHandle->Allocate();
				1361
				1362	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1363	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				1364
				1365	ExecuteWorkload(*workload, memoryManager);
				1366
				1367	workloadRef->PostAllocationConfigure();
				1368	workloadRef->Execute();
				1369
				1370	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1371	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				1372
				1373	return ret;
				1374	}
				1375
				1376	//
				1377	// DepthwiseConvolution2d implementations
				1378	//
				1379
				1380	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1381	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1382	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
				1383	armnn::IWorkloadFactory& workloadFactory,
				1384	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1385	const boost::multi_array<T, 4>& input,
				1386	const boost::multi_array<T, 4>& kernel,
				1387	const boost::multi_array<B, 1>& bias,
				1388	const boost::multi_array<T, 4>& outputExpected,
				1389	float qScale,
				1390	int32_t qOffset,
				1391	const armnn::DataLayout layout,
				1392	uint32_t padLeft = 0,
				1393	uint32_t padTop = 0,
				1394	uint32_t padRight = 0,
				1395	uint32_t padBottom = 0,
				1396	uint32_t strideX = 1,
				1397	uint32_t strideY = 1)
				1398	{
				1399	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				1400	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
				1401	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
				1402	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
				1403	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				1404	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				1405	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				1406	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				1407	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				1408	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				1409	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				1410	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				1411
				1412	// If a bias is used, its size must equal the number of output channels.
				1413	bool biasEnabled = bias.size() > 0;
				1414	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1415
				1416	// Creates the tensors.
				1417	armnn::TensorInfo inputTensorInfo =
				1418	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1419	armnn::TensorInfo outputTensorInfo =
				1420	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1421	armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1422	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1423
				1424	// Set quantization parameters if the requested type is a quantized type.
				1425	if (armnn::IsQuantizedType<T>())
				1426	{
				1427	inputTensorInfo.SetQuantizationScale(qScale);
				1428	inputTensorInfo.SetQuantizationOffset(qOffset);
				1429	outputTensorInfo.SetQuantizationScale(qScale);
				1430	outputTensorInfo.SetQuantizationOffset(qOffset);
				1431	kernelDesc.SetQuantizationScale(qScale);
				1432	kernelDesc.SetQuantizationOffset(qOffset);
				1433	biasDesc.SetQuantizationScale(qScale*qScale);
				1434	biasDesc.SetQuantizationOffset(0);
				1435	}
				1436
				1437	// Construct the input data.
				1438	std::vector<T> inputData;
				1439	inputData.assign(input.data(), input.data() + inputChannelsinputHeightinputWidth);
				1440
				1441	// At this point if we require it permute the input data
				1442	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1443	if (layout == armnn::DataLayout::NHWC)
				1444	{
				1445	std::vector<T> tmp(inputData.size());
				1446	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1447	inputData = tmp;
				1448	}
				1449
				1450	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1451
				1452	// Construct the output data, with bias applied, as appropriate.
				1453	std::vector<T> outputData;
				1454	outputData.assign(outputExpected.data(), outputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1455	if (biasEnabled)
				1456	{
				1457	std::vector<T> biasV;
				1458	biasV.assign(bias.data(), bias.data() + outputChannels);
				1459	ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1460	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1461	outputWidth, outputHeight);
				1462	}
				1463
				1464	LayerTestResult<T, 4> ret(outputTensorInfo);
				1465
				1466	// At this point if we require it permute the expected output
				1467	if (layout == armnn::DataLayout::NHWC)
				1468	{
				1469	std::vector<T> tmp(outputData.size());
				1470	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1471	outputData = tmp;
				1472	}
				1473
				1474	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1475
				1476	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1477	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1478
				1479	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1480
				1481	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1482
				1483	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1484	if (biasEnabled)
				1485	{
				1486	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1487	}
				1488
				1489	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1490	data.m_Weight = &weightsTensor;
				1491	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
				1492	data.m_Parameters.m_StrideX = strideX;
				1493	data.m_Parameters.m_StrideY = strideY;
				1494	data.m_Parameters.m_PadLeft = padLeft;
				1495	data.m_Parameters.m_PadRight = padRight;
				1496	data.m_Parameters.m_PadTop = padTop;
				1497	data.m_Parameters.m_PadBottom = padBottom;
				1498	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1499	data.m_Parameters.m_DataLayout = layout;
				1500
				1501	armnn::WorkloadInfo info;
				1502	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1503	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1504
				1505	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1506	inputHandle->Allocate();
				1507	outputHandle->Allocate();
				1508
				1509	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				1510
				1511	ExecuteWorkload(*workload, memoryManager);
				1512
				1513	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1514
				1515	return ret;
				1516	}
				1517
				1518	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1519	LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
				1520	armnn::IWorkloadFactory& workloadFactory,
				1521	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1522	float qScale,
				1523	int32_t qOffset,
				1524	bool biasEnabled,
				1525	const armnn::DataLayout layout)
				1526	{
				1527	using B = armnn::ResolveType<ArmnnBType>;
				1528
				1529	unsigned int inputHeight = 3;
				1530	unsigned int inputWidth = 3;
				1531	unsigned int inputChannels = 2;
				1532	unsigned int inputNum = 1;
				1533
				1534	unsigned int kernelHeight = 3;
				1535	unsigned int kernelWidth = 3;
				1536	unsigned int kernelChannels = inputChannels;
				1537	unsigned int kernelDepthMultiplier = 1;
				1538
				1539	unsigned int outputHeight = 1;
				1540	unsigned int outputWidth = 1;
				1541	unsigned int outputChannels = kernelChannels;
				1542	unsigned int outputNum = inputNum;
				1543
				1544	armnn::TensorInfo inputTensorInfo =
				1545	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1546	armnn::TensorInfo outputTensorInfo =
				1547	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1548	armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
				1549	ArmnnType);
				1550	armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
				1551
				1552	// Set quantization parameters if the requested type is a quantized type.
				1553	if(armnn::IsQuantizedType<T>())
				1554	{
				1555	inputTensorInfo.SetQuantizationScale(qScale);
				1556	inputTensorInfo.SetQuantizationOffset(qOffset);
				1557	outputTensorInfo.SetQuantizationScale(qScale);
				1558	outputTensorInfo.SetQuantizationOffset(qOffset);
				1559	kernelDesc.SetQuantizationScale(qScale);
				1560	kernelDesc.SetQuantizationOffset(qOffset);
				1561	biasDesc.SetQuantizationScale(qScale*qScale);
				1562	biasDesc.SetQuantizationOffset(0);
				1563	}
				1564	std::vector<T> inputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1565	QuantizedVector<T>({
				1566	1.f, 2.f, 1.f,
				1567	2.f, 1.f, 2.f,
				1568	1.f, 2.f, 1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1569
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1570	1.f, 2.f, 1.f,
				1571	2.f, 1.f, 2.f,
				1572	1.f, 2.f, 1.f,
				1573	},
				1574	inputTensorInfo.GetQuantizationScale(),
				1575	inputTensorInfo.GetQuantizationOffset()));
				1576
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1577	// at this point if we require it permute the input data
				1578	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1579	if (layout == armnn::DataLayout::NHWC)
				1580	{
				1581	std::vector<T> tmp(inputData.size());
				1582	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1583	inputData = tmp;
				1584	}
				1585	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1586
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1587	std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
				1588	biasDesc.GetQuantizationScale(),
				1589	biasDesc.GetQuantizationOffset()));
				1590
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1591	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1592
				1593	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1594	QuantizedVector<T>({
				1595	1.f, 0.f, 1.f,
				1596	0.f, 0.f, 0.f,
				1597	-1.f, 0.f, -1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1598
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1599	1.f, 0.f, 1.f,
				1600	0.f, 0.f, 0.f,
				1601	-1.f, 0.f, -1.f,
				1602	},
				1603	kernelDesc.GetQuantizationScale(),
				1604	kernelDesc.GetQuantizationOffset()));
				1605
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1606	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1607
				1608	// Manually calculated.
				1609	std::vector<T> outputImage(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1610	QuantizedVector<T>({ 0.f, 0.f },
				1611	outputTensorInfo.GetQuantizationScale(),
				1612	outputTensorInfo.GetQuantizationOffset())
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1613	);
				1614
				1615	// Optionally apply bias to output image.
				1616	if(biasEnabled)
				1617	{
				1618	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1619	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1620	outputWidth, outputHeight);
				1621	}
				1622
				1623	LayerTestResult<T, 4> ret(outputTensorInfo);
				1624	if (layout == armnn::DataLayout::NHWC)
				1625	{
				1626	std::vector<T> tmp(outputImage.size());
				1627	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
				1628	outputImage = tmp;
				1629	}
				1630
				1631	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1632
				1633	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1634	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1635
				1636	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1637	armnn::WorkloadInfo info;
				1638	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1639	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1640
				1641	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1642	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1643
				1644	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1645	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1646
				1647	data.m_Weight = &weightsTensor;
				1648	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1649	data.m_Parameters.m_StrideX = 1;
				1650	data.m_Parameters.m_StrideY = 1;
				1651	data.m_Parameters.m_PadLeft = 0;
				1652	data.m_Parameters.m_PadRight = 0;
				1653	data.m_Parameters.m_PadTop = 0;
				1654	data.m_Parameters.m_PadBottom = 0;
				1655	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1656	data.m_Parameters.m_DataLayout = layout;
				1657
				1658	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1659	inputHandle->Allocate();
				1660	outputHandle->Allocate();
				1661
				1662	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1663
				1664	ExecuteWorkload(*workload, memoryManager);
				1665
				1666	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1667
				1668	return ret;
				1669	}
				1670
				1671	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1672	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1673	armnn::IWorkloadFactory& workloadFactory,
				1674	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1675	float qScale,
				1676	int32_t qOffset,
				1677	bool biasEnabled,
				1678	const armnn::DataLayout layout)
				1679	{
				1680	using B = armnn::ResolveType<ArmnnBType>;
				1681
				1682	unsigned int depthMultiplier = 2;
				1683
				1684	unsigned int inputHeight = 8;
				1685	unsigned int inputWidth = 16;
				1686	unsigned int inputChannels = 2;
				1687	unsigned int inputBatchSize = 1;
				1688
				1689	unsigned int kernelHeight = 5;
				1690	unsigned int kernelWidth = 3;
				1691
				1692	unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
				1693	unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
				1694	unsigned int outputChannels = inputChannels * depthMultiplier;
				1695	unsigned int outputBatchSize = inputBatchSize;
				1696
				1697	armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
				1698	inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1699	armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
				1700	outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1701	armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
				1702	ArmnnType);
				1703	armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
				1704
				1705	// Set quantization parameters if the requested type is a quantized type.
				1706	if(armnn::IsQuantizedType<T>())
				1707	{
				1708	inputTensorInfo.SetQuantizationScale(qScale);
				1709	inputTensorInfo.SetQuantizationOffset(qOffset);
				1710	outputTensorInfo.SetQuantizationScale(qScale);
				1711	outputTensorInfo.SetQuantizationOffset(qOffset);
				1712	kernelDesc.SetQuantizationScale(qScale);
				1713	kernelDesc.SetQuantizationOffset(qOffset);
				1714	biasDesc.SetQuantizationScale(qScale*qScale);
				1715	biasDesc.SetQuantizationOffset(0);
				1716	}
				1717
				1718	// NOTE: originalInputData is in NCHW format
				1719	std::vector<T> originalInputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1720	QuantizedVector<T>({
				1721	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1722	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1723	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1724	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1725	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1726	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1727	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1728	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1729	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1730	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1731	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1732	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1733	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1734	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1735	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1736	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1737	},
				1738	inputTensorInfo.GetQuantizationScale(),
				1739	inputTensorInfo.GetQuantizationOffset()));
				1740
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1741	std::vector<T> inputData = originalInputData;
				1742	// at this point if we require it permute the input data
				1743	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1744	if (layout == armnn::DataLayout::NHWC)
				1745	{
				1746	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
				1747	originalInputData.data(), inputData.data(), sizeof(T));
				1748	}
				1749	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1750
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1751	std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
				1752	biasDesc.GetQuantizationScale(),
				1753	biasDesc.GetQuantizationOffset());
				1754
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1755	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1756
				1757	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1758	QuantizedVector<T>({
				1759	1, 1, 1,
				1760	1, -1, 1,
				1761	1, 1, 1,
				1762	1, 1, 1,
				1763	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1764
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1765	2, 2, 2,
				1766	2, 2, 2,
				1767	2, 2, 2,
				1768	2, 2, 2,
				1769	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1770
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1771	0, 0, 0,
				1772	0, -1, 0,
				1773	0, 0, 0,
				1774	0, 0, 0,
				1775	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1776
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1777	0, 0, 0,
				1778	0, 0, 0,
				1779	0, 1, 0,
				1780	0, 0, 0,
				1781	0, 0, 0
				1782	},
				1783	kernelDesc.GetQuantizationScale(),
				1784	kernelDesc.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1785
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1786	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1787
				1788	// Manually calculated.
				1789	std::vector<T> originalOutputImage = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1790	QuantizedVector<T>({
				1791	3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
				1792	6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
				1793	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
				1794	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1795	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1796	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1797
				1798	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1799	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1800	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1801	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1802	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1803	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1804
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1805	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1806	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1807	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1808	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1809	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1810	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1811
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1812	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1813	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1814	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1815	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1816	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1817	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1818	},
				1819	outputTensorInfo.GetQuantizationScale(),
				1820	outputTensorInfo.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1821
				1822	// Optionally apply bias to output image.
				1823	if(biasEnabled)
				1824	{
				1825	ApplyBias(originalOutputImage,
				1826	outputTensorInfo.GetQuantizationScale(),
				1827	outputTensorInfo.GetQuantizationOffset(),
				1828	biasV,
				1829	biasDesc.GetQuantizationScale(),
				1830	biasDesc.GetQuantizationOffset(),
				1831	outputWidth,
				1832	outputHeight);
				1833	}
				1834
				1835	LayerTestResult<T, 4> ret(outputTensorInfo);
				1836	std::vector<T> outputImage = originalOutputImage;
				1837	if (layout == armnn::DataLayout::NHWC)
				1838	{
				1839	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
				1840	originalOutputImage.data(), outputImage.data(), sizeof(T));
				1841	}
				1842
				1843	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1844
				1845	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1846	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1847
				1848	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1849	armnn::WorkloadInfo info;
				1850	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1851	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1852
				1853	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1854	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1855
				1856	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1857	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1858
				1859	data.m_Weight = &weightsTensor;
				1860	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1861	data.m_Parameters.m_StrideX = 2;
				1862	data.m_Parameters.m_StrideY = 1;
				1863	data.m_Parameters.m_PadLeft = 0;
				1864	data.m_Parameters.m_PadRight = 0;
				1865	data.m_Parameters.m_PadTop = 1;
				1866	data.m_Parameters.m_PadBottom = 1;
				1867	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1868	data.m_Parameters.m_DataLayout = layout;
				1869
				1870	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1871	inputHandle->Allocate();
				1872	outputHandle->Allocate();
				1873
				1874	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1875
				1876	ExecuteWorkload(*workload, memoryManager);
				1877
				1878	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1879
				1880	return ret;
				1881	}
				1882
				1883	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1884	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1885	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1886	armnn::IWorkloadFactory& workloadFactory,
				1887	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1888	const boost::multi_array<T, 4>& originalInput,
				1889	const boost::multi_array<T, 4>& originalKernel,
				1890	const boost::multi_array<B, 1>& bias,
				1891	const boost::multi_array<T, 4>& originalOutputExpected,
				1892	float qScale,
				1893	int32_t qOffset,
				1894	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1895	uint32_t padLeft = 0,
				1896	uint32_t padTop = 0,
				1897	uint32_t padRight = 0,
				1898	uint32_t padBottom = 0,
				1899	uint32_t strideX = 1,
				1900	uint32_t strideY = 1,
				1901	uint32_t dilationX = 1,
				1902	uint32_t dilationY = 1)
				1903	{
				1904	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				1905	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				1906	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				1907	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				1908
				1909	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				1910	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				1911	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				1912	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				1913
				1914	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				1915	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				1916	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				1917	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				1918
				1919	bool biasEnabled = bias.size() > 0;
				1920
				1921	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				1922	BOOST_ASSERT(inputNum == 1);
				1923	BOOST_ASSERT(outputNum == 1);
				1924
				1925	// If a bias is used, its size must equal the number of output channels.
				1926	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1927
				1928
				1929	// Note these tensors will use two (identical) batches.
				1930	armnn::TensorInfo inputTensorInfo =
				1931	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1932	armnn::TensorInfo outputTensorInfo =
				1933	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1934
				1935	// Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
				1936	armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1937
				1938	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1939
				1940	// Set quantization parameters if the requested type is a quantized type.
				1941	if(armnn::IsQuantizedType<T>())
				1942	{
				1943	inputTensorInfo.SetQuantizationScale(qScale);
				1944	inputTensorInfo.SetQuantizationOffset(qOffset);
				1945	outputTensorInfo.SetQuantizationScale(qScale);
				1946	outputTensorInfo.SetQuantizationOffset(qOffset);
				1947	kernelDesc.SetQuantizationScale(qScale);
				1948	kernelDesc.SetQuantizationOffset(qOffset);
				1949	biasDesc.SetQuantizationScale(qScale*qScale);
				1950	biasDesc.SetQuantizationOffset(0);
				1951	}
				1952
				1953	LayerTestResult<T, 4> ret(outputTensorInfo);
				1954
				1955	// Construct input data
				1956	std::vector<T> input;
				1957	input.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				1958	std::vector<T> inputData;
				1959	inputData.insert(inputData.end(), input.begin(), input.end());
				1960	inputData.insert(inputData.end(), input.begin(), input.end());
				1961
				1962	// at this point if we require it permute the input data
				1963	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1964	if (layout == armnn::DataLayout::NHWC)
				1965	{
				1966	std::vector<T> tmp(inputData.size());
				1967	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1968	inputData = tmp;
				1969	}
				1970
				1971	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1972
				1973	std::vector<T> output;
				1974	output.assign(originalOutputExpected.data(),
				1975	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1976
				1977	// Apply bias to output data if it is enabled.
				1978	if(biasEnabled)
				1979	{
				1980	std::vector<T> biasV;
				1981	biasV.assign(bias.data(), bias.data() + outputChannels);
				1982	ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1983	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1984	outputWidth, outputHeight);
				1985	}
				1986
				1987	// Construct expected output data
				1988	std::vector<T> outputData;
				1989	outputData.insert(outputData.end(), output.begin(), output.end());
				1990	outputData.insert(outputData.end(), output.begin(), output.end());
				1991
				1992	// at this point if we require it permute the expected output
				1993	if (layout == armnn::DataLayout::NHWC)
				1994	{
				1995	std::vector<T> tmp(outputData.size());
				1996	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1997	outputData = tmp;
				1998	}
				1999	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				2000
				2001	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2002	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2003
				2004	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2005	armnn::WorkloadInfo info;
				2006	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2007	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2008
				2009	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				2010	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2011
				2012	if(biasEnabled)
				2013	{
				2014	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2015	}
				2016
				2017	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2018	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2019
				2020	data.m_Weight = &weightsTensor;
				2021	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				2022	data.m_Parameters.m_StrideX = strideX;
				2023	data.m_Parameters.m_StrideY = strideY;
				2024	data.m_Parameters.m_PadLeft = padLeft;
				2025	data.m_Parameters.m_PadRight = padRight;
				2026	data.m_Parameters.m_PadTop = padTop;
				2027	data.m_Parameters.m_PadBottom = padBottom;
				2028	data.m_Parameters.m_BiasEnabled = biasEnabled;
				2029	data.m_Parameters.m_DataLayout = layout;
				2030	data.m_Parameters.m_DilationX = dilationX;
				2031	data.m_Parameters.m_DilationY = dilationY;
				2032
				2033	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2034	inputHandle->Allocate();
				2035	outputHandle->Allocate();
				2036
				2037	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				2038
				2039	ExecuteWorkload(*workload, memoryManager);
				2040
				2041	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2042
				2043	return ret;
				2044	}
				2045
				2046	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2047	typename T = armnn::ResolveType<ArmnnType>>
				2048	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
				2049	armnn::IWorkloadFactory& workloadFactory,
				2050	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2051	float qScale,
				2052	int32_t qOffset,
				2053	bool biasEnabled,
				2054	const armnn::DataLayout layout)
				2055	{
				2056	// Use a single-batch 2-channel 5x5 image as input.
				2057	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2058	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2059	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2060	0, 1, 2, 3, 4,
				2061	5, 6, 7, 8, 9,
				2062	10, 11, 12, 13, 14,
				2063	15, 16, 17, 18, 19,
				2064	20, 21, 22, 23, 24,
				2065
				2066	25, 26, 27, 28, 29,
				2067	30, 31, 32, 33, 34,
				2068	35, 36, 37, 38, 39,
				2069	40, 41, 42, 43, 44,
				2070	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2071	},
				2072	inputTensorInfo.GetQuantizationScale(),
				2073	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2074
				2075	// Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
				2076	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2077	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2078	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2079	32, 31, 30, 29,
				2080	28, 27, 26, 25,
				2081	24, 23, 22, 21,
				2082	20, 19, 18, 17,
				2083
				2084	16, 15, 14, 13,
				2085	12, 11, 10, 9,
				2086	8, 7, 6, 5,
				2087	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2088	},
				2089	kernelTensorInfo.GetQuantizationScale(),
				2090	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2091
				2092	// Expected output is 1 batch of a 2-channel 5x5 image.
				2093	// Calculated using the python tensorflow library with strideX=1, strideY=1.
				2094	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2095	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2096	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2097	1062, 1580, 1850, 1530, 1117,
				2098	2140, 3108, 3500, 2842, 2042,
				2099	3580, 5068, 5460, 4342, 3062,
				2100	3618, 5072, 5390, 4248, 2971,
				2101	3074, 4282, 4510, 3533, 2457,
				2102
				2103	1550, 2284, 2362, 1955, 1428,
				2104	2910, 4206, 4342, 3528, 2536,
				2105	3390, 4886, 5022, 4068, 2916,
				2106	3566, 5056, 5182, 4133, 2922,
				2107	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2108	},
				2109	outputTensorInfo.GetQuantizationScale(),
				2110	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2111
				2112	return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
				2113	workloadFactory,
				2114	memoryManager,
				2115	input,
				2116	kernel,
				2117	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2118	expectedOutput,
				2119	qScale,
				2120	qOffset,
				2121	layout,
				2122	1, // Padding left.
				2123	1, // Padding top.
				2124	2, // Padding right.
				2125	2, // Padding bottom.
				2126	1, // strideX
				2127	1); // strideY
				2128	}
				2129
				2130	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2131	typename T = armnn::ResolveType<ArmnnType>>
				2132	LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
				2133	armnn::IWorkloadFactory& workloadFactory,
				2134	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2135	float qScale,
				2136	int32_t qOffset,
				2137	bool biasEnabled)
				2138	{
				2139	auto layout = armnn::DataLayout::NHWC;
				2140
				2141	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2142	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2143	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2144	0, 1, 2, 3, 4,
				2145	5, 6, 7, 8, 9,
				2146	10, 11, 12, 13, 14,
				2147	15, 16, 17, 18, 19,
				2148	20, 21, 22, 23, 24,
				2149
				2150	25, 26, 27, 28, 29,
				2151	30, 31, 32, 33, 34,
				2152	35, 36, 37, 38, 39,
				2153	40, 41, 42, 43, 44,
				2154	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2155	},
				2156	inputTensorInfo.GetQuantizationScale(),
				2157	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2158
				2159	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2160	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2161	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2162	32, 31, 30, 29,
				2163	28, 27, 26, 25,
				2164	24, 23, 22, 21,
				2165	20, 19, 18, 17,
				2166
				2167	16, 15, 14, 13,
				2168	12, 11, 10, 9,
				2169	8, 7, 6, 5,
				2170	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2171	},
				2172	kernelTensorInfo.GetQuantizationScale(),
				2173	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2174
				2175	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2176	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2177	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2178	1062, 1580, 1850, 1530, 1117,
				2179	2140, 3108, 3500, 2842, 2042,
				2180	3580, 5068, 5460, 4342, 3062,
				2181	3618, 5072, 5390, 4248, 2971,
				2182	3074, 4282, 4510, 3533, 2457,
				2183
				2184	1550, 2284, 2362, 1955, 1428,
				2185	2910, 4206, 4342, 3528, 2536,
				2186	3390, 4886, 5022, 4068, 2916,
				2187	3566, 5056, 5182, 4133, 2922,
				2188	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2189	},
				2190	outputTensorInfo.GetQuantizationScale(),
				2191	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2192
				2193	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2194	workloadFactory,
				2195	memoryManager,
				2196	input,
				2197	kernel,
				2198	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2199	expectedOutput,
				2200	qScale,
				2201	qOffset,
				2202	layout,
				2203	1, // Padding left.
				2204	1, // Padding top.
				2205	2, // Padding right.
				2206	2, // Padding bottom.
				2207	1, // strideX
				2208	1); // strideY
				2209	}
				2210
				2211	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2212	typename T = armnn::ResolveType<ArmnnType>>
				2213	LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
				2214	armnn::IWorkloadFactory& workloadFactory,
				2215	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2216	float qScale,
				2217	int32_t qOffset,
				2218	bool biasEnabled)
				2219	{
				2220	auto layout = armnn::DataLayout::NHWC;
				2221
				2222	armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
				2223	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2224	QuantizedVector<T>({
				2225	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2226	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2227	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2228	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2229	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2230	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2231	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2232	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2233	0, 0, 0, 0, 0, 0, 0, 0, 0
				2234	},
				2235	inputTensorInfo.GetQuantizationScale(),
				2236	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2237
				2238	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2239	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2240	QuantizedVector<T>({
				2241	1, 2, 3,
				2242	4, 5, 6,
				2243	7, 8, 9
				2244	},
				2245	kernelTensorInfo.GetQuantizationScale(),
				2246	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2247
				2248	uint32_t padLeft = 0;
				2249	uint32_t padTop = 0;
				2250	uint32_t padRight = 0;
				2251	uint32_t padBottom = 0;
				2252	uint32_t strideX = 1;
				2253	uint32_t strideY = 1;
				2254	uint32_t dilationX = 3;
				2255	uint32_t dilationY = 3;
				2256
				2257	// Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
				2258	armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2259	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2260	QuantizedVector<T>({
				2261	5, 5, 5,
				2262	5, 5, 5,
				2263	5, 5, 5
				2264	},
				2265	outputTensorInfo.GetQuantizationScale(),
				2266	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2267
				2268	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2269	workloadFactory,
				2270	memoryManager,
				2271	input,
				2272	kernel,
				2273	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2274	expectedOutput,
				2275	qScale,
				2276	qOffset,
				2277	layout,
				2278	padLeft,
				2279	padTop,
				2280	padRight,
				2281	padBottom,
				2282	strideX,
				2283	strideY,
				2284	dilationX,
				2285	dilationY);
				2286	}
				2287
				2288	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				2289	LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
				2290	armnn::IWorkloadFactory& workloadFactory,
				2291	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2292	const std::vector<float>& inputNoQuantizedValues,
				2293	armnn::TensorInfo& inputTensorInfo,
				2294	const std::vector<float>& kernelNoQuantizedValues,
				2295	armnn::TensorInfo& kernelTensorInfo,
				2296	const std::vector<float>& outputExpectedNoQuantizedValues,
				2297	armnn::TensorInfo& outputTensorInfo,
				2298	uint32_t dilationX,
				2299	uint32_t dilationY,
				2300	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				2301	bool biasEnabled = false)
				2302	{
				2303	float qScale;
				2304	int32_t qOffset;
				2305	switch (ArmnnType)
				2306	{
				2307	case armnn::DataType::QuantisedAsymm8:
				2308	{
				2309	qScale = 0.1f;
				2310	qOffset = 128;
				2311	break;
				2312	}
				2313	case armnn::DataType::QuantisedSymm16:
				2314	{
				2315	qScale = 0.1f;
				2316	qOffset = 0;
				2317	break;
				2318	}
				2319	case armnn::DataType::Float32:
				2320	default:
				2321	{
				2322	qScale = 0.f;
				2323	qOffset = 0;
				2324	break;
				2325	}
				2326	}
				2327
				2328	inputTensorInfo.SetQuantizationScale(qScale);
				2329	inputTensorInfo.SetQuantizationOffset(qOffset);
				2330	kernelTensorInfo.SetQuantizationScale(qScale);
				2331	kernelTensorInfo.SetQuantizationOffset(qOffset);
				2332	outputTensorInfo.SetQuantizationScale(qScale);
				2333	outputTensorInfo.SetQuantizationOffset(qOffset);
				2334
				2335	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2336	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				2337	inputTensorInfo.GetQuantizationScale(),
				2338	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2339	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2340	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				2341	kernelTensorInfo.GetQuantizationScale(),
				2342	kernelTensorInfo.GetQuantizationOffset())));
				2343	auto expectedOutput =
				2344	MakeTensor<T, 4>(outputTensorInfo,
				2345	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				2346	outputTensorInfo.GetQuantizationScale(),
				2347	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2348
				2349	uint32_t padLeft = 0;
				2350	uint32_t padTop = 0;
				2351	uint32_t padRight = 0;
				2352	uint32_t padBottom = 0;
				2353	uint32_t strideX = 1;
				2354	uint32_t strideY = 1;
				2355
				2356	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2357	workloadFactory,
				2358	memoryManager,
				2359	input,
				2360	kernel,
				2361	GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
				2362	expectedOutput,
				2363	qScale,
				2364	qOffset,
				2365	layout,
				2366	padLeft,
				2367	padTop,
				2368	padRight,
				2369	padBottom,
				2370	strideX,
				2371	strideY,
				2372	dilationX,
				2373	dilationY);
				2374	}
				2375
				2376	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2377	LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
				2378	armnn::IWorkloadFactory& workloadFactory,
				2379	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2380	bool biasEnabled,
				2381	const armnn::DataLayout layout)
				2382	{
				2383	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				2384	std::vector<float> inputNoQuantizedValues =
				2385	{
				2386	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2387	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2388	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2389	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2390	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2391	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2392	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2393	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2394	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2395	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2396	};
				2397
				2398	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2399	std::vector<float> kernelNoQuantizedValues =
				2400	{
				2401	1, 2, 3,
				2402	4, 5, 6,
				2403	7, 8, 9
				2404	};
				2405
				2406	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2407	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2408	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				2409	std::vector<float> outputExpectedNoQuantizedValues =
				2410	{
				2411	6., 5., 5., 5.,
				2412	6., 5., 5., 5.,
				2413	6., 5., 5., 5.,
				2414	3., 2., 2., 2.
				2415	};
				2416
				2417	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2418	workloadFactory,
				2419	memoryManager,
				2420	inputNoQuantizedValues,
				2421	inputTensorInfo,
				2422	kernelNoQuantizedValues,
				2423	kernelTensorInfo,
				2424	outputExpectedNoQuantizedValues,
				2425	outputTensorInfo,
				2426	3,
				2427	3,
				2428	layout,
				2429	biasEnabled);
				2430	}
				2431
				2432	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2433	LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
				2434	armnn::IWorkloadFactory& workloadFactory,
				2435	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2436	bool biasEnabled,
				2437	const armnn::DataLayout layout)
				2438	{
				2439	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				2440	std::vector<float> inputNoQuantizedValues =
				2441	{
				2442	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2443	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2444	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2445	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2446	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2447	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2448	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2449	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2450	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2451	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2452
				2453	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2454	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2455	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2456	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2457	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2458	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2459	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2460	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2461	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2462	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2463	};
				2464
				2465	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				2466	std::vector<float> kernelNoQuantizedValues =
				2467	{
				2468	1, 2, 3,
				2469	4, 5, 6,
				2470	7, 8, 9,
				2471
				2472	1, 2, 3,
				2473	4, 5, 6,
				2474	7, 8, 9
				2475	};
				2476
				2477	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2478	// therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2479	armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
				2480	std::vector<float> outputExpectedNoQuantizedValues =
				2481	{
				2482	6., 5., 5., 5.,
				2483	6., 5., 5., 5.,
				2484	6., 5., 5., 5.,
				2485	3., 2., 2., 2.,
				2486
				2487	6., 5., 5., 5.,
				2488	6., 5., 5., 5.,
				2489	6., 5., 5., 5.,
				2490	3., 2., 2., 2.
				2491	};
				2492
				2493	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2494	workloadFactory,
				2495	memoryManager,
				2496	inputNoQuantizedValues,
				2497	inputTensorInfo,
				2498	kernelNoQuantizedValues,
				2499	kernelTensorInfo,
				2500	outputExpectedNoQuantizedValues,
				2501	outputTensorInfo,
				2502	3,
				2503	3,
				2504	layout,
				2505	biasEnabled);
				2506	}
				2507
				2508	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2509	LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
				2510	armnn::IWorkloadFactory& workloadFactory,
				2511	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2512	bool biasEnabled,
				2513	const armnn::DataLayout layout)
				2514	{
				2515	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2516	std::vector<float> inputNoQuantizedValues =
				2517	{
				2518	10.0, 10.0, 10.0,
				2519	10.0, 10.0, 10.0,
				2520	10.0, 10.0, 10.0,
				2521
				2522	21.0, 22.0, 23.0,
				2523	24.0, 25.0, 26.0,
				2524	27.0, 28.0, 29.0
				2525	};
				2526
				2527	armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
				2528
				2529	std::vector<float> kernelNoQuantizedValues =
				2530	{
				2531	0.25f, 0.25f,
				2532	0.25f, 0.25f,
				2533
				2534	0.25f, 0.25f,
				2535	0.25f, 0.25f,
				2536
				2537	0.0f , 0.0f,
				2538	0.0f , 0.1f,
				2539
				2540	0.0f , 0.0f,
				2541	0.0f , 0.1f,
				2542
				2543	0.2f , 0.0f,
				2544	0.0f , 0.0f,
				2545
				2546	0.2f , 0.0f,
				2547	0.0f , 0.0f,
				2548
				2549	0.0f , 0.3f,
				2550	0.0f , 0.0f,
				2551
				2552	0.0f , 0.3f,
				2553	0.0f , 0.0f
				2554	};
				2555
				2556	armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
				2557	std::vector<float> outputExpectedNoQuantizedValues =
				2558	{
				2559	10.f, 10.f,
				2560	10.f, 10.f,
				2561
				2562	1.f, 1.f,
				2563	1.f, 1.f,
				2564
				2565	2.f, 2.f,
				2566	2.f, 2.f,
				2567
				2568	3.f, 3.f,
				2569	3.f, 3.f,
				2570
				2571	23.f, 24.f,
				2572	26.f, 27.f,
				2573
				2574	2.5f, 2.6000001f,
				2575	2.8f, 2.9f,
				2576
				2577	4.2000003f, 4.4f,
				2578	4.8f, 5.f,
				2579
				2580	6.6000004f, 6.9f,
				2581	7.5000005f, 7.8f
				2582	};
				2583
				2584
				2585	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2586	workloadFactory,
				2587	memoryManager,
				2588	inputNoQuantizedValues,
				2589	inputTensorInfo,
				2590	kernelNoQuantizedValues,
				2591	kernelTensorInfo,
				2592	outputExpectedNoQuantizedValues,
				2593	outputTensorInfo,
				2594	1,
				2595	1,
				2596	layout,
				2597	biasEnabled);
				2598	}
				2599
				2600	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2601	LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
				2602	armnn::IWorkloadFactory& workloadFactory,
				2603	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2604	bool biasEnabled,
				2605	const armnn::DataLayout layout)
				2606	{
				2607	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2608	std::vector<float> inputNoQuantizedValues =
				2609	{
				2610	10.0, 10.0, 10.0,
				2611	10.0, 10.0, 10.0,
				2612	10.0, 10.0, 10.0,
				2613
				2614	21.0, 22.0, 23.0,
				2615	24.0, 25.0, 26.0,
				2616	27.0, 28.0, 29.0
				2617	};
				2618
				2619	armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
				2620
				2621	std::vector<float> kernelNoQuantizedValues =
				2622	{
				2623	0.25f, 0.25f,
				2624	0.25f, 0.25f,
				2625
				2626	0.2f , 0.0f,
				2627	0.0f , 0.0f,
				2628
				2629	0.0f , 0.0f,
				2630	0.0f , 0.1f,
				2631
				2632	0.0f , 0.3f,
				2633	0.0f , 0.0f
				2634
				2635	};
				2636
				2637	armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
				2638	std::vector<float> outputExpectedNoQuantizedValues =
				2639	{
				2640	10.f, 10.f,
				2641	10.f, 10.f,
				2642
				2643	1.f, 1.f,
				2644	1.f, 1.f,
				2645
				2646	4.2000003f, 4.4f,
				2647	4.8f, 5.f,
				2648
				2649	6.6000004f, 6.9f,
				2650	7.5000005f, 7.8f
				2651	};
				2652
				2653
				2654	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2655	workloadFactory,
				2656	memoryManager,
				2657	inputNoQuantizedValues,
				2658	inputTensorInfo,
				2659	kernelNoQuantizedValues,
				2660	kernelTensorInfo,
				2661	outputExpectedNoQuantizedValues,
				2662	outputTensorInfo,
				2663	1,
				2664	1,
				2665	layout,
				2666	biasEnabled);
				2667	}
				2668
				2669	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				2670	LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
				2671	armnn::IWorkloadFactory& workloadFactory,
				2672	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2673	armnn::IWorkloadFactory& refWorkloadFactory,
				2674	const armnnUtils::DataLayoutIndexed& layout)
				2675	{
				2676	unsigned int inputHeight = 8;
				2677	unsigned int inputWidth = 16;
				2678	unsigned int inputChannels = 3;
				2679	unsigned int inputNum = 5;
				2680
				2681	unsigned int kernelHeight = 3;
				2682	unsigned int kernelWidth = 3;
				2683	unsigned int channelMultiplier = 1;
				2684
				2685	unsigned int strideX = 2;
				2686	unsigned int strideY = 3;
				2687	unsigned int padX = 1;
				2688	unsigned int padY = 1;
				2689
				2690	unsigned int outputNum = inputNum;
				2691	unsigned int outputChannels = inputChannels * channelMultiplier;
				2692	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				2693	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				2694
				2695	armnn::TensorInfo inputTensorInfo;
				2696	armnn::TensorInfo outputTensorInfo;
				2697	armnn::TensorInfo kernelDesc;
				2698	armnn::TensorInfo biasDesc;
				2699
				2700
				2701	std::vector<unsigned int> inputShape;
				2702	std::vector<unsigned int> outputShape;
				2703	std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
				2704	std::vector<unsigned int> biasShape{ outputChannels };
				2705	switch (layout.GetDataLayout())
				2706	{
				2707	case armnn::DataLayout::NCHW:
				2708	inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
				2709	outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
				2710	break;
				2711	case armnn::DataLayout ::NHWC:
				2712	inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
				2713	outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
				2714	break;
				2715	default:
				2716	throw armnn::InvalidArgumentException("unknown data layout ["
				2717	+ std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
				2718	}
				2719
				2720	float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
				2721	float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
				2722	int32_t qOffset = 0;
				2723
				2724	inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
				2725	outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
				2726	kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
				2727	biasDesc = armnn::TensorInfo(
				2728	1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
				2729
				2730	LayerTestResult<T, 4> ret(outputTensorInfo);
				2731
				2732	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
				2733	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
				2734	auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
				2735	biasDesc, 1028, 0.0f, 255.0f);
				2736
				2737	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2738	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2739
				2740	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2741	armnn::WorkloadInfo info;
				2742	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2743	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2744
				2745	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2746	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2747
				2748	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2749	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2750	data.m_Weight = &weightsTensor;
				2751	data.m_Bias = &biasTensor;
				2752	data.m_Parameters.m_StrideX = strideX;
				2753	data.m_Parameters.m_StrideY = strideY;
				2754	data.m_Parameters.m_PadLeft = padX;
				2755	data.m_Parameters.m_PadRight = padX;
				2756	data.m_Parameters.m_PadTop = padY;
				2757	data.m_Parameters.m_PadBottom = padY;
				2758	data.m_Parameters.m_BiasEnabled = true;
				2759	data.m_Parameters.m_DataLayout = layout.GetDataLayout();
				2760
				2761	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				2762	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				2763
				2764	armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
				2765	armnn::WorkloadInfo refInfo = info;
				2766	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				2767	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				2768
				2769	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2770	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
				2771
				2772	outputHandleRef->Allocate();
				2773	inputHandleRef->Allocate();
				2774
				2775	inputHandle->Allocate();
				2776	outputHandle->Allocate();
				2777
				2778	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				2779	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				2780
				2781	ExecuteWorkload(*workload, memoryManager);
				2782
				2783	workloadRef->PostAllocationConfigure();
				2784	workloadRef->Execute();
				2785
				2786	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2787	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				2788
				2789	return ret;
				2790	}
				2791
				2792	//
				2793	// Explicit template specializations
				2794	//
				2795
				2796	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2797	Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2798	armnn::IWorkloadFactory&,
				2799	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2800	bool,
				2801	armnn::DataLayout);
				2802
				2803	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2804	Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2805	armnn::IWorkloadFactory&,
				2806	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2807	bool,
				2808	armnn::DataLayout);
				2809
				2810	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2811	Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2812	armnn::IWorkloadFactory&,
				2813	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2814	bool,
				2815	armnn::DataLayout);
				2816
				2817	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2818	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2819	armnn::IWorkloadFactory&,
				2820	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2821	bool,
				2822	armnn::DataLayout);
				2823
				2824	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2825	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2826	armnn::IWorkloadFactory&,
				2827	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2828	bool,
				2829	armnn::DataLayout);
				2830
				2831	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2832	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2833	armnn::IWorkloadFactory&,
				2834	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2835	bool,
				2836	armnn::DataLayout);
				2837
				2838	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2839	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2840	armnn::IWorkloadFactory &workloadFactory,
				2841	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2842	bool biasEnabled,
				2843	const armnn::DataLayout layout);
				2844
				2845	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2846	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2847	armnn::IWorkloadFactory &workloadFactory,
				2848	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2849	bool biasEnabled,
				2850	const armnn::DataLayout layout);
				2851
				2852	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2853	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2854	armnn::IWorkloadFactory &workloadFactory,
				2855	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2856	bool biasEnabled,
				2857	const armnn::DataLayout layout);
				2858
				2859	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2860	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2861	armnn::IWorkloadFactory&,
				2862	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2863	bool,
				2864	armnn::DataLayout);
				2865
				2866	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2867	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2868	armnn::IWorkloadFactory&,
				2869	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2870	bool,
				2871	armnn::DataLayout);
				2872
				2873	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2874	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2875	armnn::IWorkloadFactory&,
				2876	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2877	bool,
				2878	armnn::DataLayout);
				2879
				2880	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2881	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2882	armnn::IWorkloadFactory&,
				2883	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2884	bool,
				2885	armnn::DataLayout);
				2886
				2887	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2888	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2889	armnn::IWorkloadFactory&,
				2890	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2891	bool,
				2892	armnn::DataLayout);
				2893
				2894	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2895	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2896	armnn::IWorkloadFactory&,
				2897	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2898	bool,
				2899	armnn::DataLayout);
				2900
				2901	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2902	DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2903	armnn::IWorkloadFactory &workloadFactory,
				2904	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2905	bool biasEnabled,
				2906	const armnn::DataLayout layout);
				2907
				2908	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2909	DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2910	armnn::IWorkloadFactory &workloadFactory,
				2911	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2912	bool biasEnabled,
				2913	const armnn::DataLayout layout);
				2914
				2915	//
				2916	// Implementation functions
				2917	//
				2918
				2919	LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
				2920	armnn::IWorkloadFactory& workloadFactory,
				2921	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2922	bool biasEnabled,
				2923	const armnn::DataLayout layout)
				2924	{
				2925	return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2926	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2927	}
				2928
				2929	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
				2930	armnn::IWorkloadFactory& workloadFactory,
				2931	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2932	bool biasEnabled,
				2933	const armnn::DataLayout layout)
				2934	{
				2935	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2936	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2937	}
				2938
				2939	LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
				2940	armnn::IWorkloadFactory& workloadFactory,
				2941	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2942	bool biasEnabled,
				2943	const armnn::DataLayout layout)
				2944	{
				2945	return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2946	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2947	}
				2948
				2949	LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
				2950	armnn::IWorkloadFactory& workloadFactory,
				2951	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2952	bool biasEnabled)
				2953	{
				2954	return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
				2955	workloadFactory,
				2956	memoryManager,
				2957	0.f,
				2958	0,
				2959	biasEnabled,
				2960	armnn::DataLayout::NHWC);
				2961	}
				2962
				2963	LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
				2964	armnn::IWorkloadFactory& workloadFactory,
				2965	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2966	bool biasEnabled,
				2967	const armnn::DataLayout layout)
				2968	{
				2969	return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
				2970	workloadFactory,
				2971	memoryManager,
				2972	0.f,
				2973	0,
				2974	biasEnabled,
				2975	layout);
				2976	}
				2977
				2978	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
				2979	armnn::IWorkloadFactory& workloadFactory,
				2980	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2981	bool biasEnabled,
				2982	const armnn::DataLayout layout)
				2983	{
				2984	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2985	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2986	}
				2987
				2988	LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
				2989	armnn::IWorkloadFactory& workloadFactory,
				2990	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2991	bool biasEnabled,
				2992	const armnn::DataLayout layout)
				2993	{
				2994	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2995	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2996	}
				2997
				2998	LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
				2999	armnn::IWorkloadFactory& workloadFactory,
				3000	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3001	bool biasEnabled,
				3002	const armnn::DataLayout layout)
				3003	{
				3004	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				3005	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3006	}
				3007
				3008	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
				3009	armnn::IWorkloadFactory& workloadFactory,
				3010	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3011	armnn::DataLayout layout)
				3012	{
				3013	return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3014	workloadFactory, memoryManager, layout, 0.0f, 0);
				3015	}
				3016
				3017	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
				3018	armnn::IWorkloadFactory& workloadFactory,
				3019	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3020	armnn::DataLayout layout)
				3021	{
				3022	return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
				3023	<armnn::DataType::Float32, armnn::DataType::Float32>(
				3024	workloadFactory, memoryManager, layout, 0.0f, 0);
				3025	}
				3026
				3027	LayerTestResult<float, 4> Convolution1dTest(
				3028	armnn::IWorkloadFactory& workloadFactory,
				3029	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3030	bool biasEnabled)
				3031	{
				3032	return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3033	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3034	}
				3035
				3036	LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
				3037	armnn::IWorkloadFactory& workloadFactory,
				3038	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3039	bool biasEnabled)
				3040	{
				3041	return Convolution1dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3042	workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
				3043	}
				3044
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	3045	LayerTestResult<uint8_t, 4> Convolution2dPerAxisQuantTest(
				3046	armnn::IWorkloadFactory& workloadFactory,
				3047	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3048	const armnn::DataLayout layout)
				3049	{
				3050	using namespace armnn;
				3051
				3052	const DataType inputType = DataType::QuantisedAsymm8;
				3053	const DataType kernelType = DataType::QuantizedSymm8PerAxis;
				3054	const DataType biasType = DataType::Signed32;
				3055
				3056	TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
				3057	TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
				3058
				3059	const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
				3060	constexpr unsigned int quantDimension = 0;
				3061
				3062	TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
				3063
				3064	const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
				3065	TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
				3066
				3067	std::vector<uint8_t> inputData =
				3068	{
				3069	138, 108, 138, 108, 138, 108
				3070	};
				3071
				3072	std::vector<int8_t> kernelData =
				3073	{
				3074	1, 2, 1, 2, 1, 2
				3075	};
				3076
				3077	std::vector<int32_t> biasData =
				3078	{
				3079	4, 4, 4
				3080	};
				3081
				3082	std::vector<uint8_t> expectedOutputData =
				3083	{
				3084	121, 118, 115, 121, 118, 115, 121, 118, 115
				3085	};
				3086
				3087	if (layout == DataLayout::NCHW)
				3088	{
				3089	PermuteTensorNhwcToNchw(inputInfo, inputData);
				3090	PermuteTensorNhwcToNchw(kernelInfo, kernelData);
				3091	PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
				3092	}
				3093
				3094	Convolution2dDescriptor descriptor;
				3095	descriptor.m_StrideX = 1;
				3096	descriptor.m_StrideY = 1;
				3097	descriptor.m_PadLeft = 0;
				3098	descriptor.m_PadRight = 0;
				3099	descriptor.m_PadTop = 0;
				3100	descriptor.m_PadBottom = 0;
				3101	descriptor.m_BiasEnabled = true;
				3102	descriptor.m_DataLayout = layout;
				3103
				3104	std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				3105	std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				3106
				3107	WorkloadInfo workloadInfo;
				3108	ScopedCpuTensorHandle weightTensor(kernelInfo);
				3109	ScopedCpuTensorHandle biasTensor(biasInfo);
				3110
				3111	AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
				3112	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				3113
				3114	Convolution2dQueueDescriptor queueDescriptor;
				3115	queueDescriptor.m_Parameters = descriptor;
				3116	queueDescriptor.m_Weight = &weightTensor;
				3117	queueDescriptor.m_Bias = &biasTensor;
				3118
				3119	AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
				3120	AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
				3121
				3122	std::unique_ptr<IWorkload> workload = workloadFactory.CreateConvolution2d(queueDescriptor, workloadInfo);
				3123	inputHandle->Allocate();
				3124	outputHandle->Allocate();
				3125
				3126	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				3127
				3128	ExecuteWorkload(*workload, memoryManager);
				3129
				3130	LayerTestResult<uint8_t, 4> ret(outputInfo);
				3131	CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
				3132	ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
				3133
				3134	return ret;
				3135	}
				3136
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3137	LayerTestResult<float,4> CompareConvolution2dTest(
				3138	armnn::IWorkloadFactory& workloadFactory,
				3139	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3140	armnn::IWorkloadFactory& refWorkloadFactory)
				3141	{
				3142	return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
				3143	workloadFactory, memoryManager, refWorkloadFactory);
				3144	}
				3145
				3146	LayerTestResult<float, 4> DepthwiseConvolution2dTest(
				3147	armnn::IWorkloadFactory& workloadFactory,
				3148	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3149	bool biasEnabled,
				3150	const armnn::DataLayout layout)
				3151	{
				3152	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3153	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3154	}
				3155
				3156	LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
				3157	armnn::IWorkloadFactory& workloadFactory,
				3158	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3159	bool biasEnabled)
				3160	{
				3161	return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3162	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3163	}
				3164
				3165	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
				3166	armnn::IWorkloadFactory& workloadFactory,
				3167	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3168	bool biasEnabled,
				3169	const armnn::DataLayout layout)
				3170	{
				3171	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3172	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3173	}
				3174
				3175	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
				3176	armnn::IWorkloadFactory& workloadFactory,
				3177	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3178	{
				3179	armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
				3180	auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
				3181
				3182	std::vector<float> kernelData;
				3183	std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
				3184	for (unsigned int i = 0; i < 64; ++i)
				3185	{
				3186	kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
				3187	}
				3188	armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
				3189	auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
				3190
				3191	std::vector<float> expectedOutputData(64, 0.f);
				3192	armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
				3193	auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
				3194
				3195	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3196	workloadFactory,
				3197	memoryManager,
				3198	input,
				3199	kernel,
				3200	boost::multi_array<float, 1>(),
				3201	expectedOutput,
				3202	0.f,
				3203	0,
				3204	armnn::DataLayout::NCHW);
				3205	}
				3206
				3207	LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
				3208	armnn::IWorkloadFactory& workloadFactory,
				3209	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3210	bool biasEnabled,
				3211	const armnn::DataLayout layout)
				3212	{
				3213	return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3214	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3215	}
				3216
				3217	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
				3218	armnn::IWorkloadFactory& workloadFactory,
				3219	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3220	bool biasEnabled,
				3221	const armnn::DataLayout layout)
				3222	{
				3223	return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3224	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3225	}
				3226
				3227	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
				3228	armnn::IWorkloadFactory& workloadFactory,
				3229	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3230	bool biasEnabled,
				3231	const armnn::DataLayout layout)
				3232	{
				3233	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3234	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3235	}
				3236
				3237	LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
				3238	armnn::IWorkloadFactory& workloadFactory,
				3239	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3240	{
				3241	return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3242	workloadFactory,
				3243	memoryManager,
				3244	0.f,
				3245	0,
				3246	false);
				3247	}
				3248
				3249	LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
				3250	armnn::IWorkloadFactory& workloadFactory,
				3251	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3252	bool biasEnabled,
				3253	const armnn::DataLayout layout)
				3254	{
				3255	return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				3256	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3257	}
				3258
				3259	LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
				3260	armnn::IWorkloadFactory& workloadFactory,
				3261	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3262	bool biasEnabled,
				3263	const armnn::DataLayout layout)
				3264	{
				3265	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				3266	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3267	}
				3268
Teresa Charlin	d8df026	2019-11-11 12:28:15 +0000	[diff] [blame]	3269	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
				3270	armnn::IWorkloadFactory& workloadFactory,
				3271	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3272	const armnn::DataLayout layout)
				3273	{
				3274	using namespace armnn;
				3275
				3276	const DataType inputType = DataType::QuantisedAsymm8;
				3277	const DataType kernelType = DataType::QuantizedSymm8PerAxis;
				3278	const DataType biasType = DataType::Signed32;
				3279
				3280	TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
				3281	TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
				3282
				3283	const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
				3284	const unsigned int quantDimension = 0;
				3285	TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
				3286
				3287	const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
				3288	constexpr unsigned int biasQuantDimension = 0;
				3289	TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
				3290
				3291	std::vector<uint8_t> inputData =
				3292	{
				3293	129, 130,
				3294	129, 130,
				3295	129, 130,
				3296	129, 130,
				3297	129, 130,
				3298	129, 130,
				3299	129, 130,
				3300	129, 130,
				3301	129, 130
				3302	};
				3303
				3304	std::vector<int8_t> kernelData =
				3305	{
				3306	1, 1, 1, 1,
				3307	1, 1, 1, 1,
				3308	1, 1, 1, 1,
				3309	1, 1, 1, 1
				3310	};
				3311
				3312	std::vector<int32_t> biasData =
				3313	{
				3314	4, 4, 4, 4
				3315	};
				3316
				3317	std::vector<uint8_t> expectedOutputData =
				3318	{
				3319	132, 130, 134, 131,
				3320	132, 130, 134, 131,
				3321	132, 130, 134, 131,
				3322	132, 130, 134, 131
				3323	};
				3324
				3325	if (layout == DataLayout::NCHW)
				3326	{
				3327	PermuteTensorNhwcToNchw(inputInfo, inputData);
				3328	PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
				3329	}
				3330
				3331	DepthwiseConvolution2dDescriptor descriptor;
				3332	descriptor.m_StrideX = 1;
				3333	descriptor.m_StrideY = 1;
				3334	descriptor.m_PadLeft = 0;
				3335	descriptor.m_PadRight = 0;
				3336	descriptor.m_PadTop = 0;
				3337	descriptor.m_PadBottom = 0;
				3338	descriptor.m_DilationX = 1;
				3339	descriptor.m_DilationY = 1;
				3340	descriptor.m_BiasEnabled = true;
				3341	descriptor.m_DataLayout = layout;
				3342
				3343	std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				3344	std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				3345
				3346	WorkloadInfo workloadInfo;
				3347	ScopedCpuTensorHandle weightTensor(kernelInfo);
				3348	ScopedCpuTensorHandle biasTensor(biasInfo);
				3349
				3350	AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
				3351	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				3352
				3353	DepthwiseConvolution2dQueueDescriptor queueDescriptor;
				3354	queueDescriptor.m_Parameters = descriptor;
				3355	queueDescriptor.m_Weight = &weightTensor;
				3356	queueDescriptor.m_Bias = &biasTensor;
				3357
				3358	AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
				3359	AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
				3360
				3361	std::unique_ptr<IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo);
				3362	inputHandle->Allocate();
				3363	outputHandle->Allocate();
				3364
				3365	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				3366
				3367	ExecuteWorkload(*workload, memoryManager);
				3368
				3369	LayerTestResult<uint8_t, 4> ret(outputInfo);
				3370
				3371	CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
				3372	ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
				3373
				3374	return ret;
				3375	}
				3376
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3377	LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
				3378	armnn::IWorkloadFactory& workloadFactory,
				3379	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3380	armnn::IWorkloadFactory& refWorkloadFactory,
				3381	const armnn::DataLayout layout)
				3382	{
				3383	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
				3384	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3385	}
				3386
				3387	LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
				3388	armnn::IWorkloadFactory& workloadFactory,
				3389	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3390	armnn::IWorkloadFactory& refWorkloadFactory,
				3391	const armnn::DataLayout layout)
				3392	{
				3393	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8>(
				3394	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3395	}