Blame - src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp - ml/armnn

blob: 198904e71d193f11f595cf81a92870652ff99cd3 [file] [log] [blame]

Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "Conv2dTestImpl.hpp"
				7
				8	#include <DataLayoutIndexed.hpp>
				9	#include <Permute.hpp>
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	10	#include <QuantizeHelper.hpp>
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	11	#include <TensorUtils.hpp>
				12
				13	#include <armnn/ArmNN.hpp>
				14
				15	#include <backendsCommon/CpuTensorHandle.hpp>
				16
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	17	#include <backendsCommon/test/TensorCopyUtils.hpp>
				18	#include <backendsCommon/test/WorkloadTestUtils.hpp>
				19
				20	#include <test/TensorHelpers.hpp>
				21
				22	#include <boost/numeric/conversion/cast.hpp>
				23
				24	#include <string>
				25
				26	//
				27	// Static data
				28	//
				29
				30	// 2-channel bias used by a number of Conv2d tests.
				31	static std::vector<float> Bias2({0, 2});
				32
				33	static std::vector<float> Bias4({1, 2, 3, 4});
				34
				35	static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
				36
				37	// 3-channel 16x8 image used as common input data for a number of Conv2d tests.
				38	static std::vector<float> ConvInput3x8x16({
				39	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				40	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				41	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				42	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				43	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				44	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				45	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				46	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				47	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				48	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				49	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				50	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				51	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				52	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				53	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				54	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				55	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				56	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				57	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				58	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				59	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				60	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				61	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				62	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
				63	});
				64
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	65	using namespace armnnUtils;
				66
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	67	//
				68	// Helper templates
				69	//
				70
				71	// Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
				72	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				73	boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
				74	{
				75	if(biasEnabled)
				76	{
				77	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	78	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias2, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	79	return bias;
				80	}
				81	else
				82	{
				83	return boost::multi_array<T, 1>();
				84	}
				85	}
				86
				87	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				88	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				89	boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
				90	{
				91	if(biasEnabled)
				92	{
				93	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	94	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias4, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	95	return bias;
				96	}
				97	else
				98	{
				99	return boost::multi_array<T, 1>();
				100	}
				101	}
				102
				103	// Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
				104	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				105	boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
				106	{
				107	if(biasEnabled)
				108	{
				109	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	110	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias8, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	111	return bias;
				112	}
				113	else
				114	{
				115	return boost::multi_array<T, 1>();
				116	}
				117	}
				118
				119	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				120	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				121	boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
				122	{
				123	const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
				124	const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
				125	const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
				126
				127	switch (outputChannels)
				128	{
				129	case 2:
				130	default:
				131	{
				132	return GetBias2<ArmnnType>(biasEnabled, qScale);
				133	}
				134	case 4:
				135	{
				136	return GetBias4<ArmnnType>(biasEnabled, qScale);
				137	}
				138	case 8:
				139	{
				140	return GetBias8<ArmnnType>(biasEnabled, qScale);
				141	}
				142	}
				143	}
				144
				145	//
				146	// Implementation templates
				147	//
				148
				149	// Mapping from input type to bias type for fully connected layers.
				150	// float => float, uint8_t => int32_t
				151	template<typename T>
				152	struct FullyConnectedBiasTypeForInputType;
				153
				154	template<>
				155	struct FullyConnectedBiasTypeForInputType<float>
				156	{
				157	using Type = float;
				158	};
				159
				160	template<>
				161	struct FullyConnectedBiasTypeForInputType<uint8_t>
				162	{
				163	using Type = int32_t;
				164	};
				165
				166	// Modifies a std::vector in-place using a specified bias.
				167	template<typename T, typename B>
				168	void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
				169	const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
				170	{
				171	BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) \|\| (!armnn::IsQuantizedType<T>()),
				172	"Invalid type and parameter combination.");
				173	BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) \|\| (!armnn::IsQuantizedType<B>()),
				174	"Invalid type and parameter combination.");
				175
				176	// Note we need to dequantize and re-quantize the image value and the bias.
				177	for (uint32_t i = 0; i < bias.size(); ++i)
				178	{
				179	float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
				180	for (uint32_t y = 0; y < h; ++y)
				181	{
				182	for (uint32_t x = 0; x < w; ++x)
				183	{
				184	uint32_t offset = (i * h + y) * w + x;
				185	BOOST_ASSERT(offset < v.size());
				186	T& outRef = v[offset];
				187	float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
				188	outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
				189	}
				190	}
				191	}
				192	}
				193
				194	//
				195	// Convolution2d implementations
				196	//
				197
				198	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				199	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				200	LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
				201	armnn::IWorkloadFactory& workloadFactory,
				202	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				203	const boost::multi_array<T, 4>& originalInput,
				204	const boost::multi_array<T, 4>& originalKernel,
				205	const boost::multi_array<B, 1>& bias,
				206	const boost::multi_array<T, 4>& originalOutputExpected,
				207	float qScale,
				208	int32_t qOffset,
				209	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				210	uint32_t padLeft = 0,
				211	uint32_t padTop = 0,
				212	uint32_t padRight = 0,
				213	uint32_t padBottom = 0,
				214	uint32_t strideX = 1,
				215	uint32_t strideY = 1,
				216	uint32_t dilationX = 1,
				217	uint32_t dilationY = 1)
				218	{
				219	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				220	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				221	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				222	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				223
				224	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				225	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				226	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				227	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				228
				229	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				230	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				231	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				232	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				233
				234	bool biasEnabled = bias.size() > 0;
				235
				236	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				237	BOOST_ASSERT(inputNum == 1);
				238	BOOST_ASSERT(outputNum == 1);
				239
				240	// If a bias is used, its size must equal the number of output channels.
				241	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				242
				243
				244	// Note these tensors will use two (identical) batches.
				245	armnn::TensorInfo inputTensorInfo =
				246	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				247	armnn::TensorInfo outputTensorInfo =
				248	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				249	armnn::TensorInfo kernelDesc =
				250	armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
				251	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				252
				253	// Set quantization parameters if the requested type is a quantized type.
				254	if(armnn::IsQuantizedType<T>())
				255	{
				256	inputTensorInfo.SetQuantizationScale(qScale);
				257	inputTensorInfo.SetQuantizationOffset(qOffset);
				258	outputTensorInfo.SetQuantizationScale(qScale);
				259	outputTensorInfo.SetQuantizationOffset(qOffset);
				260	kernelDesc.SetQuantizationScale(qScale);
				261	kernelDesc.SetQuantizationOffset(qOffset);
				262	biasDesc.SetQuantizationScale(qScale*qScale);
				263	biasDesc.SetQuantizationOffset(0);
				264	}
				265
				266	LayerTestResult<T, 4> ret(outputTensorInfo);
				267
				268	// Construct input data - two batches of the same input image.
				269	std::vector<T> inputImage;
				270	inputImage.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				271	std::vector<T> inputData;
				272	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				273	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				274
				275	// at this point if we require it permute the input data
				276	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				277	if (layout == armnn::DataLayout::NHWC)
				278	{
				279	std::vector<T> tmp(inputData.size());
				280	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				281	inputData = tmp;
				282	}
				283
				284	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				285
				286	std::vector<T> outputImage;
				287	outputImage.assign(originalOutputExpected.data(),
				288	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				289
				290	// Apply bias to output image if it is enabled.
				291	if(biasEnabled)
				292	{
				293	std::vector<T> biasV;
				294	biasV.assign(bias.data(), bias.data() + outputChannels);
				295	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				296	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				297	outputWidth, outputHeight);
				298	}
				299
				300	// Construct expected output data - two identical images.
				301	std::vector<T> outputData;
				302	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				303	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				304
				305	// at this point if we require it permute the expected output
				306	if (layout == armnn::DataLayout::NHWC)
				307	{
				308	std::vector<T> tmp(outputData.size());
				309	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				310	outputData = tmp;
				311	}
				312	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				313
				314	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				315	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				316
				317	armnn::Convolution2dQueueDescriptor data;
				318	armnn::WorkloadInfo info;
				319	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				320	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				321	// Permute the kernel if necessary
				322	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				323	if (layout == armnn::DataLayout::NHWC)
				324	{
				325	armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
				326	}
				327	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				328
				329	if(biasEnabled)
				330	{
				331	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				332	}
				333
				334	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				335	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				336
				337	data.m_Weight = &weightsTensor;
				338	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				339	data.m_Parameters.m_StrideX = strideX;
				340	data.m_Parameters.m_StrideY = strideY;
				341	data.m_Parameters.m_PadLeft = padLeft;
				342	data.m_Parameters.m_PadRight = padRight;
				343	data.m_Parameters.m_PadTop = padTop;
				344	data.m_Parameters.m_PadBottom = padBottom;
				345	data.m_Parameters.m_BiasEnabled = biasEnabled;
				346	data.m_Parameters.m_DataLayout = layout;
				347	data.m_Parameters.m_DilationX = dilationX;
				348	data.m_Parameters.m_DilationY = dilationY;
				349
				350	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				351	inputHandle->Allocate();
				352	outputHandle->Allocate();
				353
				354	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				355
				356	ExecuteWorkload(*workload, memoryManager);
				357
				358	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				359
				360	return ret;
				361	}
				362
				363	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				364	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				365	LayerTestResult<T, 4> SimpleConvolution2dNhwcTestImpl(
				366	armnn::IWorkloadFactory& workloadFactory,
				367	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				368	const boost::multi_array<T, 4>& input,
				369	const boost::multi_array<T, 4>& kernel,
				370	const boost::multi_array<B, 1>& bias,
				371	const boost::multi_array<T, 4>& outputExpected,
				372	const armnn::DataLayout dataLayout,
				373	float qScale,
				374	int32_t qOffset,
				375	uint32_t padLeft = 1,
				376	uint32_t padTop = 1,
				377	uint32_t padRight = 1,
				378	uint32_t padBottom = 1,
				379	uint32_t strideX = 1,
				380	uint32_t strideY = 1)
				381	{
				382	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				383	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
				384	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
				385	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
				386
				387	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				388	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				389	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				390	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				391
				392	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				393	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				394	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				395	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				396
				397	bool biasEnabled = bias.size() > 0;
				398
				399	// Creates the tensors.
				400	armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
				401	armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
				402	ArmnnType);
				403	armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
				404	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				405
				406	// Construct the input data.
				407	std::vector<T> inputData;
				408	inputData.assign(input.data(), input.data() + inputHeightinputWidthinputChannels);
				409	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				410
				411	// Construct the output data, with bias applied, as appropriate.
				412	std::vector<T> outputData;
				413	outputData.assign(outputExpected.data(), outputExpected.data() + outputHeightoutputWidthoutputChannels);
				414
				415	LayerTestResult<T, 4> ret(outputTensorInfo);
				416	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				417
				418	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				419	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				420
				421	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				422	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				423
				424	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				425
				426	armnn::Convolution2dQueueDescriptor data;
				427
				428	data.m_Weight = &weightsTensor;
				429	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				430	data.m_Parameters.m_StrideX = strideX;
				431	data.m_Parameters.m_StrideY = strideY;
				432	data.m_Parameters.m_PadLeft = padLeft;
				433	data.m_Parameters.m_PadRight = padRight;
				434	data.m_Parameters.m_PadTop = padTop;
				435	data.m_Parameters.m_PadBottom = padBottom;
				436	data.m_Parameters.m_BiasEnabled = biasEnabled;
				437	data.m_Parameters.m_DataLayout = dataLayout;
				438
				439	armnn::WorkloadInfo info;
				440	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				441	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				442
				443	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				444	inputHandle->Allocate();
				445	outputHandle->Allocate();
				446
				447	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				448
				449	ExecuteWorkload(*workload, memoryManager);
				450
				451	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				452
				453	return ret;
				454	}
				455
				456	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				457	LayerTestResult<T,4> Convolution1dTestImpl(
				458	armnn::IWorkloadFactory& workloadFactory,
				459	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				460	float qScale,
				461	int32_t qOffset,
				462	bool biasEnabled)
				463	{
				464	using B = armnn::ResolveType<ArmnnBType>;
				465	// Until we have a specialist 1D convolution layer, we can fake one using
				466	// 2D convolution with the final dimension set to 1.
				467	// I don't anticipate this being particularly slow, given that convolution is implemented
				468	// as a matrix multiplication, at which point dimension doesn't matter.
				469
				470	unsigned int batchSize = 1;
				471	unsigned int inputChannels = 2;
				472	unsigned int outputChannels = 3;
				473	unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
				474	unsigned int kernelSize = 3;
				475	unsigned int padSize = 2;
				476	unsigned int stride = 1;
				477	unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
				478
				479	armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
				480	armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
				481	armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
				482	armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
				483
				484	// Set quantization parameters if the requested type is a quantized type.
				485	if(armnn::IsQuantizedType<T>())
				486	{
				487	inputInfo.SetQuantizationScale(qScale);
				488	inputInfo.SetQuantizationOffset(qOffset);
				489	outputInfo.SetQuantizationScale(qScale);
				490	outputInfo.SetQuantizationOffset(qOffset);
				491	kernelInfo.SetQuantizationScale(qScale);
				492	kernelInfo.SetQuantizationOffset(qOffset);
				493	biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
				494	biasInfo.SetQuantizationOffset(0);
				495	}
				496
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	497	std::vector<T> inputData = QuantizedVector<T>(
				498	{
				499	5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
				500	-3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
				501	},
				502	inputInfo.GetQuantizationScale(),
				503	inputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	504
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	505	std::vector<T> kernelData = QuantizedVector<T>(
				506	{
				507	1.0f, 0.0f, 0.0f,
				508	0.0f, 2.0f, -1.5f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	509
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	510	0.0f, 0.0f, 0.0f,
				511	0.2f, 0.2f, 0.2f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	512
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	513	0.5f, 0.0f, 0.5f,
				514	0.0f, -1.0f, 0.0f
				515	},
				516	kernelInfo.GetQuantizationScale(),
				517	kernelInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	518
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	519	std::vector<B> biasData =
				520	QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	521
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	522	std::vector<T> outputData = QuantizedVector<T>(
				523	{
				524	4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	525	-0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	526	2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
				527	},
				528	outputInfo.GetQuantizationScale(),
				529	outputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	530
				531	// Optionally apply bias to output image.
				532	if(biasEnabled)
				533	{
				534	ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
				535	biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
				536	1, outputSize);
				537	}
				538
				539	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				540	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				541
				542	armnn::Convolution2dQueueDescriptor data;
				543	armnn::WorkloadInfo info;
				544	armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
				545	armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
				546
				547	AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
				548	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				549
				550	AddInputToWorkload(data, info, inputInfo, inputHandle.get());
				551	AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
				552
				553	data.m_Weight = &weightsTensor;
				554	data.m_Bias = &biasTensor;
				555	data.m_Parameters.m_StrideX = 1;
				556	data.m_Parameters.m_StrideY = stride;
				557	data.m_Parameters.m_PadLeft = 0;
				558	data.m_Parameters.m_PadRight = 0;
				559	data.m_Parameters.m_PadTop = padSize;
				560	data.m_Parameters.m_PadBottom = padSize;
				561	data.m_Parameters.m_BiasEnabled = biasEnabled;
				562
				563	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				564	inputHandle->Allocate();
				565	outputHandle->Allocate();
				566
				567	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				568
				569	ExecuteWorkload(*workload, memoryManager);
				570
				571	// Output
				572	LayerTestResult<T,4> ret(outputInfo);
				573	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				574	ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
				575	return ret;
				576	}
				577
				578	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				579	LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
				580	armnn::IWorkloadFactory& workloadFactory,
				581	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				582	float qScale,
				583	int32_t qOffset,
				584	bool biasEnabled,
				585	armnn::DataLayout dataLayout)
				586	{
				587	// Use common single-batch 5x5 image.
				588
				589	armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
				590	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				591	{
				592	1, 5, 2, 3,
				593	8, 7, 3, 6,
				594	3, 3, 9, 1
				595	});
				596
				597
				598	// Use a 2-element batch of 3-channel 3x3 kernels.
				599	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				600	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
				601	4, 5, 6,
				602	0, 0, 0,
				603	3, 2, 1
				604	});
				605
				606	// Expected output is 1 batch of a 5x5 image.
				607	armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
				608
				609	const std::vector<float> outputData =
				610	{
				611	23, 41, 33, 21,
				612	44, 65, 76, 52,
				613	82, 85, 79, 42
				614	};
				615
				616	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				617
				618	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				619	workloadFactory,
				620	memoryManager,
				621	input,
				622	kernel,
				623	boost::multi_array<T, 1>(),
				624	expectedOutput,
				625	dataLayout,
				626	qScale,
				627	qOffset);
				628	}
				629
				630	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				631	LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
				632	armnn::IWorkloadFactory& workloadFactory,
				633	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				634	float qScale,
				635	int32_t qOffset,
				636	bool biasEnabled,
				637	const armnn::DataLayout& dataLayout)
				638	{
				639	// Input is a single-batch, 1 channel, 5x5 image.
				640	armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
				641	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				642	{
				643	1, 5, 2, 3, 5,
				644	8, 7, 3, 6, 3,
				645	3, 3, 9, 1, 9,
				646	4, 1, 8, 1, 3,
				647	6, 8, 1, 9, 2
				648	});
				649
				650	// Use a 3x3 kernel.
				651	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				652	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
				653	{
				654	4, 5, 6,
				655	0, 0, 0,
				656	3, 2, 1
				657	});
				658
				659	// Expected output is a single-batch, 1 channel, 3x3 image.
				660	armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
				661
				662	const std::vector<T> outputData =
				663	{
				664	23, 33, 24,
				665	91, 99, 48,
				666	26, 50, 19
				667	};
				668
				669	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				670
				671	uint32_t padLeft = 1;
				672	uint32_t padTop = 1;
				673	uint32_t padRight = 1;
				674	uint32_t padBottom = 1;
				675	uint32_t strideX = 2;
				676	uint32_t strideY = 2;
				677
				678	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				679	workloadFactory,
				680	memoryManager,
				681	input,
				682	kernel,
				683	boost::multi_array<T, 1>(),
				684	expectedOutput,
				685	dataLayout,
				686	qScale,
				687	qOffset,
				688	padLeft,
				689	padTop,
				690	padRight,
				691	padBottom,
				692	strideX,
				693	strideY);
				694	}
				695
				696	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				697	LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
				698	armnn::IWorkloadFactory& workloadFactory,
				699	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				700	float qScale,
				701	int32_t qOffset,
				702	bool biasEnabled,
				703	const armnn::DataLayout layout)
				704	{
				705	// Use common single-batch 3-channel 16x8 image.
				706	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	707	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	708
				709	// Use a 2-element batch with 3-channel 3x5 kernels.
				710	armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
				711	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	712	QuantizedVector<T>({
				713	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	714	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	715	1, 1, 1,
				716	1, 1, 1,
				717	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	718
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	719	0, 0, 0,
				720	0, 0, 0,
				721	0, 0, 0,
				722	0, 0, 0,
				723	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	724
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	725	2, 2, 2,
				726	2, 2, 2,
				727	2, 2, 2,
				728	2, 2, 2,
				729	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	730
				731
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	732	0, 0, 0,
				733	0, 0, 0,
				734	0, 0, 0,
				735	0, 0, 0,
				736	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	737
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	738	1, 1, 1,
				739	1, 1, 1,
				740	1, 1, 1,
				741	1, 1, 1,
				742	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	743
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	744	0, 0, 0,
				745	0, 0, 0,
				746	0, 0, 0,
				747	0, 0, 0,
				748	0, 0, 0
				749	},
				750	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	751
				752	// Expected output is 2 batch elements of a 1-channel 14x4 image.
				753	armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
				754	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	755	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	756	-24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
				757	-25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
				758	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				759	-23.5f, -23.5f, -23.5f,
				760	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				761	-23.5f, -23.5f, -23.5f,
				762
				763	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				764	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				765	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				766	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	767	},
				768	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	769
				770	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				771	workloadFactory,
				772	memoryManager,
				773	input,
				774	kernel,
				775	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				776	expectedOutput,
				777	qScale,
				778	qOffset,
				779	layout);
				780	}
				781
				782	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				783	typename T = armnn::ResolveType<ArmnnType>>
				784	LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
				785	armnn::IWorkloadFactory& workloadFactory,
				786	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				787	float qScale,
				788	int32_t qOffset,
				789	bool biasEnabled,
				790	const armnn::DataLayout layout)
				791	{
				792	// Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
				793
				794	// Use common single-batch 3-channel 16x8 image.
				795	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	796	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	797
				798	// Use a 2-element batch of 3-channel 3x3 kernels.
				799	armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
				800	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	801	QuantizedVector<T>({
				802	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	803	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	804	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	805
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	806	0, 0, 0,
				807	0, 0, 0,
				808	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	809
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	810	2, 2, 2,
				811	2, 2, 2,
				812	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	813
				814
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	815	0, 0, 0,
				816	0, 0, 0,
				817	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	818
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	819	1, 1, 1,
				820	1, 1, 1,
				821	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	822
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	823	0, 0, 0,
				824	0, 0, 0,
				825	0, 0, 0
				826	},
				827	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	828
				829	// Expected output is 1 batch of a 2-channel 14x6 image.
				830	armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
				831	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	832	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	833	-15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
				834	-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
				835	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				836	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				837	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				838	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				839
				840	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				841	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				842	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				843	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				844	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				845	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	846	},
				847	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	848
				849	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				850	workloadFactory,
				851	memoryManager,
				852	input,
				853	kernel,
				854	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				855	expectedOutput,
				856	qScale,
				857	qOffset,
				858	layout);
				859	}
				860
				861	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				862	typename T = armnn::ResolveType<ArmnnType>>
				863	LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
				864	armnn::IWorkloadFactory& workloadFactory,
				865	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				866	const armnn::DataLayout layout,
				867	float qScale,
				868	int32_t qOffset)
				869	{
				870	// Use a single-batch 1-channel 3x3 image as input.
				871	armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
				872	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	873	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	874	11,21,31,
				875	12,22,32,
				876	13,23,33
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	877	},
				878	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	879
				880	// Use 1 batch of a 1-channel 2x2 kernel.
				881	armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
				882	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	883	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	884	-11,-21,
				885	-12,-22,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	886	},
				887	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	888
				889	// Expected output is 1 batch of a 1-channel 6x8 image.
				890	// Manually calculated like this:
				891	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				892	//[-110 -210 -120 -2211 ; -110 -210 -1211 -2221 ; -110 -210 -1221 -2231 ; -110 -210 -1231 -220 ..]
				893	//[-110 -2111 -120 -2212 ; -1111 -2121 -1212 -2222 ; -1121 -2131 -1222 -2232 ; -1131 -210 -1232 -220 ..]
				894	//[-110 -2112 -120 -2213 ; -1112 -2122 -1213 -2223 ; -1122 -2132 -1223 -2233 ; -1132 -210 -1233 -220 ..]
				895	//[-110 -2113 -120 -220 ; -1113 -2123 -120 -220 ; -1123 -2133 -120 -220 ; -1133 -210 -120 -220 ..]
				896	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				897	//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
				898	armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
				899	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	900	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	901	0, 0, 0, 0, 0, 0,
				902	-242, -594, -934, -372, 0, 0,
				903	-495, -1190, -1850, -725, 0, 0,
				904	-538, -1256, -1916, -748, 0, 0,
				905	-273, -626, -946, -363, 0, 0,
				906	0, 0, 0, 0, 0, 0,
				907	0, 0, 0, 0, 0, 0,
				908	0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	909	},
				910	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	911
				912	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				913	workloadFactory,
				914	memoryManager,
				915	input,
				916	kernel,
				917	GetBias2<ArmnnBType>(false, qScale * qScale),
				918	expectedOutput,
				919	qScale,
				920	qOffset,
				921	layout,
				922	1, // Padding left.
				923	2, // Padding top.
				924	3, // Padding right.
				925	4); // Padding bottom.
				926	}
				927
				928	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				929	typename T = armnn::ResolveType<ArmnnType>>
				930	LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
				931	armnn::IWorkloadFactory& workloadFactory,
				932	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				933	const armnn::DataLayout layout,
				934	float qScale,
				935	int32_t qOffset)
				936	{
				937	// Use a single-batch 1-channel 5x5 image as input.
				938	armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
				939	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	940	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	941	11,21,31,41,51,
				942	12,22,32,42,52,
				943	13,23,33,43,53,
				944	14,24,34,44,54,
				945	15,25,35,45,55,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	946	}, qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	947
				948	// Use 1 batch of a 1-channel 4x4 kernel.
				949	armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
				950	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	951	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	952	-11,-21,-31,-41,
				953	-12,-22,-32,-42,
				954	-13,-23,-33,-43,
				955	-14,-24,-34,-44,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	956	},
				957	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	958
				959	// Expected output is 1 batch of a 1-channel 5x5 image.
				960	armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
				961	std::vector<T> myVec(outputDesc.GetNumElements(), 0);
				962	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	963	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	964	-7140, -10580, -13940, -9300, -5230,
				965	-9590, -14120, -18520, -12290, -6860,
				966	-9980, -14560, -18960, -12560, -7000,
				967	-7518, -10904, -14144, -9318, -5152,
				968	-5032, -7256, -9376, -6142, -3368,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	969	},
				970	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	971
				972	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				973	workloadFactory,
				974	memoryManager,
				975	input,
				976	kernel,
				977	GetBias2<ArmnnBType>(false, qScale * qScale),
				978	expectedOutput,
				979	qScale,
				980	qOffset,
				981	layout,
				982	1, // Padding left.
				983	1, // Padding top.
				984	2, // Padding right.
				985	2); // Padding bottom.
				986	}
				987
				988	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				989	LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
				990	armnn::IWorkloadFactory& workloadFactory,
				991	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				992	const std::vector<float>& inputNoQuantizedValues,
				993	armnn::TensorInfo& inputTensorInfo,
				994	const std::vector<float>& kernelNoQuantizedValues,
				995	armnn::TensorInfo& kernelTensorInfo,
				996	const std::vector<float>& outputExpectedNoQuantizedValues,
				997	armnn::TensorInfo& outputTensorInfo,
				998	uint32_t dilationX,
				999	uint32_t dilationY,
				1000	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1001	uint32_t padLeft = 0,
				1002	uint32_t padTop = 0,
				1003	uint32_t padRight = 0,
				1004	uint32_t padBottom = 0,
				1005	uint32_t strideX = 1,
				1006	uint32_t strideY = 1,
				1007	bool biasEnabled = false
				1008	)
				1009	{
				1010	float qScale;
				1011	int32_t qOffset;
				1012	switch (ArmnnType)
				1013	{
				1014	case armnn::DataType::QuantisedAsymm8:
				1015	{
				1016	qScale = 0.1f;
				1017	qOffset = 128;
				1018	break;
				1019	}
				1020	case armnn::DataType::QuantisedSymm16:
				1021	{
				1022	qScale = 0.1f;
				1023	qOffset = 0;
				1024	break;
				1025	}
				1026	case armnn::DataType::Float32:
				1027	default:
				1028	{
				1029	qScale = 0.f;
				1030	qOffset = 0;
				1031	break;
				1032	}
				1033	}
				1034
				1035	inputTensorInfo.SetQuantizationScale(qScale);
				1036	inputTensorInfo.SetQuantizationOffset(qOffset);
				1037	kernelTensorInfo.SetQuantizationScale(qScale);
				1038	kernelTensorInfo.SetQuantizationOffset(qOffset);
				1039	outputTensorInfo.SetQuantizationScale(qScale);
				1040	outputTensorInfo.SetQuantizationOffset(qOffset);
				1041
				1042	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1043	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				1044	inputTensorInfo.GetQuantizationScale(),
				1045	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1046	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1047	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				1048	kernelTensorInfo.GetQuantizationScale(),
				1049	kernelTensorInfo.GetQuantizationOffset())));
				1050	auto expectedOutput =
				1051	MakeTensor<T, 4>(outputTensorInfo,
				1052	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				1053	outputTensorInfo.GetQuantizationScale(),
				1054	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1055
				1056	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				1057	workloadFactory,
				1058	memoryManager,
				1059	input,
				1060	kernel,
				1061	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				1062	expectedOutput,
				1063	qScale,
				1064	qOffset,
				1065	layout,
				1066	padLeft,
				1067	padTop,
				1068	padRight,
				1069	padBottom,
				1070	strideX,
				1071	strideY,
				1072	dilationX,
				1073	dilationY);
				1074	}
				1075
				1076	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1077	LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
				1078	armnn::IWorkloadFactory& workloadFactory,
				1079	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1080	bool biasEnabled,
				1081	const armnn::DataLayout layout)
				1082	{
				1083	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1084	std::vector<float> inputNoQuantizedValues =
				1085	{
				1086	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1087	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1088	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1089	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1090	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1091	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1092	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1093	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1094	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1095	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1096	};
				1097
				1098	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				1099	std::vector<float> kernelNoQuantizedValues =
				1100	{
				1101	1, 2, 3,
				1102	4, 5, 6,
				1103	7, 8, 9
				1104	};
				1105
				1106	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1107	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1108	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1109	std::vector<float> outputExpectedNoQuantizedValues =
				1110	{
				1111	6., 5., 5., 5.,
				1112	6., 5., 5., 5.,
				1113	6., 5., 5., 5.,
				1114	3., 2., 2., 2.
				1115	};
				1116
				1117	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1118	workloadFactory,
				1119	memoryManager,
				1120	inputNoQuantizedValues,
				1121	inputTensorInfo,
				1122	kernelNoQuantizedValues,
				1123	kernelTensorInfo,
				1124	outputExpectedNoQuantizedValues,
				1125	outputTensorInfo,
				1126	3,
				1127	3,
				1128	layout,
				1129	biasEnabled);
				1130	}
				1131
				1132	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1133	LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
				1134	armnn::IWorkloadFactory& workloadFactory,
				1135	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1136	bool biasEnabled,
				1137	const armnn::DataLayout layout)
				1138	{
				1139	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				1140	std::vector<float> inputNoQuantizedValues =
				1141	{
				1142	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1143	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1144	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1145	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1146	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1147	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1148	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1149	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1150	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1151	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1152
				1153	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1154	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1155	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1156	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1157	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1158	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1159	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1160	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1161	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1162	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1163	};
				1164
				1165	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				1166	std::vector<float> kernelNoQuantizedValues =
				1167	{
				1168	1, 2, 3,
				1169	4, 5, 6,
				1170	7, 8, 9,
				1171
				1172	1, 2, 3,
				1173	4, 5, 6,
				1174	7, 8, 9
				1175	};
				1176
				1177	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1178	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1179	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1180	std::vector<float> outputExpectedNoQuantizedValues =
				1181	{
				1182	12., 10., 10., 10.,
				1183	12., 10., 10., 10.,
				1184	12., 10., 10., 10.,
				1185	6., 4., 4., 4.
				1186	};
				1187
				1188	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1189	workloadFactory,
				1190	memoryManager,
				1191	inputNoQuantizedValues,
				1192	inputTensorInfo,
				1193	kernelNoQuantizedValues,
				1194	kernelTensorInfo,
				1195	outputExpectedNoQuantizedValues,
				1196	outputTensorInfo,
				1197	3,
				1198	3,
				1199	layout,
				1200	biasEnabled);
				1201	}
				1202
				1203	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1204	LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
				1205	armnn::IWorkloadFactory &workloadFactory,
				1206	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				1207	bool biasEnabled,
				1208	const armnn::DataLayout layout)
				1209	{
				1210	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1211	std::vector<float> inputNoQuantizedValues =
				1212	{
				1213	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1214	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1215	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1216	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1217	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1218	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1219	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1220	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1221	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1222	1, 1, 1, 1, 1, 1, 1, 1, 1, 1
				1223	};
				1224
				1225	armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
				1226	std::vector<float> kernelNoQuantizedValues =
				1227	{
				1228	1, 2,
				1229	3, 4
				1230	};
				1231
				1232	// Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
				1233	// therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
				1234	// where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
				1235	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1236	std::vector<float> outputExpectedNoQuantizedValues =
				1237	{
				1238	4, 7, 7, 3,
				1239	6, 10, 10, 4,
				1240	6, 10, 10, 4,
				1241	2, 3, 3, 1
				1242	};
				1243	uint32_t padLeft = 1;
				1244	uint32_t padTop = 1;
				1245	uint32_t padRight = 1;
				1246	uint32_t padBottom = 1;
				1247
				1248	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1249	workloadFactory,
				1250	memoryManager,
				1251	inputNoQuantizedValues,
				1252	inputTensorInfo,
				1253	kernelNoQuantizedValues,
				1254	kernelTensorInfo,
				1255	outputExpectedNoQuantizedValues,
				1256	outputTensorInfo,
				1257	2,
				1258	2,
				1259	layout,
				1260	padLeft,
				1261	padTop,
				1262	padRight,
				1263	padBottom,
				1264	3,
				1265	3,
				1266	biasEnabled
				1267	);
				1268	}
				1269
				1270	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				1271	LayerTestResult<T,4> CompareConvolution2dTestImpl(
				1272	armnn::IWorkloadFactory& workloadFactory,
				1273	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1274	armnn::IWorkloadFactory& refWorkloadFactory)
				1275	{
				1276	unsigned int inputHeight = 8;
				1277	unsigned int inputWidth = 16;
				1278	unsigned int inputChannels = 3;
				1279	unsigned int inputNum = 5;
				1280
				1281	unsigned int kernelHeight = 3;
				1282	unsigned int kernelWidth = 3;
				1283
				1284	unsigned int strideX = 2;
				1285	unsigned int strideY = 3;
				1286	unsigned int padX = 1;
				1287	unsigned int padY = 1;
				1288
				1289	unsigned int outputNum = inputNum;
				1290	unsigned int outputChannels = 2;
				1291	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				1292	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				1293
				1294	armnn::TensorInfo inputTensorInfo;
				1295	armnn::TensorInfo outputTensorInfo;
				1296	armnn::TensorInfo kernelDesc;
				1297	armnn::TensorInfo biasDesc;
				1298
				1299	unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
				1300	unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
				1301	unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
				1302	unsigned int biasShape[] = {outputChannels};
				1303
				1304	inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
				1305	outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
				1306	kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
				1307	biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
				1308
				1309	LayerTestResult<T,4> ret(outputTensorInfo);
				1310
				1311	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
				1312	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
				1313	auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
				1314
				1315	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1316	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1317
				1318	armnn::Convolution2dQueueDescriptor data;
				1319	armnn::WorkloadInfo info;
				1320	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1321	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1322
				1323	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1324	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1325
				1326	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1327	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1328	data.m_Weight = &weightsTensor;
				1329	data.m_Bias = &biasTensor;
				1330	data.m_Parameters.m_StrideX = strideX;
				1331	data.m_Parameters.m_StrideY = strideY;
				1332	data.m_Parameters.m_PadLeft = padX;
				1333	data.m_Parameters.m_PadRight = padX;
				1334	data.m_Parameters.m_PadTop = padY;
				1335	data.m_Parameters.m_PadBottom = padY;
				1336	data.m_Parameters.m_BiasEnabled = true;
				1337
				1338	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				1339	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				1340
				1341	armnn::Convolution2dQueueDescriptor refData = data;
				1342	armnn::WorkloadInfo refInfo = info;
				1343	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				1344	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				1345
				1346	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				1347	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
				1348
				1349	outputHandleRef->Allocate();
				1350	inputHandleRef->Allocate();
				1351
				1352	inputHandle->Allocate();
				1353	outputHandle->Allocate();
				1354
				1355	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1356	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				1357
				1358	ExecuteWorkload(*workload, memoryManager);
				1359
				1360	workloadRef->PostAllocationConfigure();
				1361	workloadRef->Execute();
				1362
				1363	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1364	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				1365
				1366	return ret;
				1367	}
				1368
				1369	//
				1370	// DepthwiseConvolution2d implementations
				1371	//
				1372
				1373	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1374	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1375	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
				1376	armnn::IWorkloadFactory& workloadFactory,
				1377	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1378	const boost::multi_array<T, 4>& input,
				1379	const boost::multi_array<T, 4>& kernel,
				1380	const boost::multi_array<B, 1>& bias,
				1381	const boost::multi_array<T, 4>& outputExpected,
				1382	float qScale,
				1383	int32_t qOffset,
				1384	const armnn::DataLayout layout,
				1385	uint32_t padLeft = 0,
				1386	uint32_t padTop = 0,
				1387	uint32_t padRight = 0,
				1388	uint32_t padBottom = 0,
				1389	uint32_t strideX = 1,
				1390	uint32_t strideY = 1)
				1391	{
				1392	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				1393	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
				1394	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
				1395	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
				1396	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				1397	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				1398	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				1399	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				1400	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				1401	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				1402	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				1403	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				1404
				1405	// If a bias is used, its size must equal the number of output channels.
				1406	bool biasEnabled = bias.size() > 0;
				1407	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1408
				1409	// Creates the tensors.
				1410	armnn::TensorInfo inputTensorInfo =
				1411	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1412	armnn::TensorInfo outputTensorInfo =
				1413	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1414	armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1415	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1416
				1417	// Set quantization parameters if the requested type is a quantized type.
				1418	if (armnn::IsQuantizedType<T>())
				1419	{
				1420	inputTensorInfo.SetQuantizationScale(qScale);
				1421	inputTensorInfo.SetQuantizationOffset(qOffset);
				1422	outputTensorInfo.SetQuantizationScale(qScale);
				1423	outputTensorInfo.SetQuantizationOffset(qOffset);
				1424	kernelDesc.SetQuantizationScale(qScale);
				1425	kernelDesc.SetQuantizationOffset(qOffset);
				1426	biasDesc.SetQuantizationScale(qScale*qScale);
				1427	biasDesc.SetQuantizationOffset(0);
				1428	}
				1429
				1430	// Construct the input data.
				1431	std::vector<T> inputData;
				1432	inputData.assign(input.data(), input.data() + inputChannelsinputHeightinputWidth);
				1433
				1434	// At this point if we require it permute the input data
				1435	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1436	if (layout == armnn::DataLayout::NHWC)
				1437	{
				1438	std::vector<T> tmp(inputData.size());
				1439	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1440	inputData = tmp;
				1441	}
				1442
				1443	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1444
				1445	// Construct the output data, with bias applied, as appropriate.
				1446	std::vector<T> outputData;
				1447	outputData.assign(outputExpected.data(), outputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1448	if (biasEnabled)
				1449	{
				1450	std::vector<T> biasV;
				1451	biasV.assign(bias.data(), bias.data() + outputChannels);
				1452	ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1453	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1454	outputWidth, outputHeight);
				1455	}
				1456
				1457	LayerTestResult<T, 4> ret(outputTensorInfo);
				1458
				1459	// At this point if we require it permute the expected output
				1460	if (layout == armnn::DataLayout::NHWC)
				1461	{
				1462	std::vector<T> tmp(outputData.size());
				1463	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1464	outputData = tmp;
				1465	}
				1466
				1467	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1468
				1469	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1470	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1471
				1472	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1473
				1474	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1475
				1476	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1477	if (biasEnabled)
				1478	{
				1479	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1480	}
				1481
				1482	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1483	data.m_Weight = &weightsTensor;
				1484	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
				1485	data.m_Parameters.m_StrideX = strideX;
				1486	data.m_Parameters.m_StrideY = strideY;
				1487	data.m_Parameters.m_PadLeft = padLeft;
				1488	data.m_Parameters.m_PadRight = padRight;
				1489	data.m_Parameters.m_PadTop = padTop;
				1490	data.m_Parameters.m_PadBottom = padBottom;
				1491	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1492	data.m_Parameters.m_DataLayout = layout;
				1493
				1494	armnn::WorkloadInfo info;
				1495	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1496	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1497
				1498	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1499	inputHandle->Allocate();
				1500	outputHandle->Allocate();
				1501
				1502	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				1503
				1504	ExecuteWorkload(*workload, memoryManager);
				1505
				1506	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1507
				1508	return ret;
				1509	}
				1510
				1511	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1512	LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
				1513	armnn::IWorkloadFactory& workloadFactory,
				1514	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1515	float qScale,
				1516	int32_t qOffset,
				1517	bool biasEnabled,
				1518	const armnn::DataLayout layout)
				1519	{
				1520	using B = armnn::ResolveType<ArmnnBType>;
				1521
				1522	unsigned int inputHeight = 3;
				1523	unsigned int inputWidth = 3;
				1524	unsigned int inputChannels = 2;
				1525	unsigned int inputNum = 1;
				1526
				1527	unsigned int kernelHeight = 3;
				1528	unsigned int kernelWidth = 3;
				1529	unsigned int kernelChannels = inputChannels;
				1530	unsigned int kernelDepthMultiplier = 1;
				1531
				1532	unsigned int outputHeight = 1;
				1533	unsigned int outputWidth = 1;
				1534	unsigned int outputChannels = kernelChannels;
				1535	unsigned int outputNum = inputNum;
				1536
				1537	armnn::TensorInfo inputTensorInfo =
				1538	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1539	armnn::TensorInfo outputTensorInfo =
				1540	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1541	armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
				1542	ArmnnType);
				1543	armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
				1544
				1545	// Set quantization parameters if the requested type is a quantized type.
				1546	if(armnn::IsQuantizedType<T>())
				1547	{
				1548	inputTensorInfo.SetQuantizationScale(qScale);
				1549	inputTensorInfo.SetQuantizationOffset(qOffset);
				1550	outputTensorInfo.SetQuantizationScale(qScale);
				1551	outputTensorInfo.SetQuantizationOffset(qOffset);
				1552	kernelDesc.SetQuantizationScale(qScale);
				1553	kernelDesc.SetQuantizationOffset(qOffset);
				1554	biasDesc.SetQuantizationScale(qScale*qScale);
				1555	biasDesc.SetQuantizationOffset(0);
				1556	}
				1557	std::vector<T> inputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1558	QuantizedVector<T>({
				1559	1.f, 2.f, 1.f,
				1560	2.f, 1.f, 2.f,
				1561	1.f, 2.f, 1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1562
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1563	1.f, 2.f, 1.f,
				1564	2.f, 1.f, 2.f,
				1565	1.f, 2.f, 1.f,
				1566	},
				1567	inputTensorInfo.GetQuantizationScale(),
				1568	inputTensorInfo.GetQuantizationOffset()));
				1569
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1570	// at this point if we require it permute the input data
				1571	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1572	if (layout == armnn::DataLayout::NHWC)
				1573	{
				1574	std::vector<T> tmp(inputData.size());
				1575	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1576	inputData = tmp;
				1577	}
				1578	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1579
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1580	std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
				1581	biasDesc.GetQuantizationScale(),
				1582	biasDesc.GetQuantizationOffset()));
				1583
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1584	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1585
				1586	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1587	QuantizedVector<T>({
				1588	1.f, 0.f, 1.f,
				1589	0.f, 0.f, 0.f,
				1590	-1.f, 0.f, -1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1591
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1592	1.f, 0.f, 1.f,
				1593	0.f, 0.f, 0.f,
				1594	-1.f, 0.f, -1.f,
				1595	},
				1596	kernelDesc.GetQuantizationScale(),
				1597	kernelDesc.GetQuantizationOffset()));
				1598
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1599	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1600
				1601	// Manually calculated.
				1602	std::vector<T> outputImage(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1603	QuantizedVector<T>({ 0.f, 0.f },
				1604	outputTensorInfo.GetQuantizationScale(),
				1605	outputTensorInfo.GetQuantizationOffset())
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1606	);
				1607
				1608	// Optionally apply bias to output image.
				1609	if(biasEnabled)
				1610	{
				1611	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1612	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1613	outputWidth, outputHeight);
				1614	}
				1615
				1616	LayerTestResult<T, 4> ret(outputTensorInfo);
				1617	if (layout == armnn::DataLayout::NHWC)
				1618	{
				1619	std::vector<T> tmp(outputImage.size());
				1620	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
				1621	outputImage = tmp;
				1622	}
				1623
				1624	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1625
				1626	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1627	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1628
				1629	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1630	armnn::WorkloadInfo info;
				1631	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1632	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1633
				1634	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1635	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1636
				1637	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1638	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1639
				1640	data.m_Weight = &weightsTensor;
				1641	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1642	data.m_Parameters.m_StrideX = 1;
				1643	data.m_Parameters.m_StrideY = 1;
				1644	data.m_Parameters.m_PadLeft = 0;
				1645	data.m_Parameters.m_PadRight = 0;
				1646	data.m_Parameters.m_PadTop = 0;
				1647	data.m_Parameters.m_PadBottom = 0;
				1648	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1649	data.m_Parameters.m_DataLayout = layout;
				1650
				1651	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1652	inputHandle->Allocate();
				1653	outputHandle->Allocate();
				1654
				1655	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1656
				1657	ExecuteWorkload(*workload, memoryManager);
				1658
				1659	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1660
				1661	return ret;
				1662	}
				1663
				1664	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1665	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1666	armnn::IWorkloadFactory& workloadFactory,
				1667	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1668	float qScale,
				1669	int32_t qOffset,
				1670	bool biasEnabled,
				1671	const armnn::DataLayout layout)
				1672	{
				1673	using B = armnn::ResolveType<ArmnnBType>;
				1674
				1675	unsigned int depthMultiplier = 2;
				1676
				1677	unsigned int inputHeight = 8;
				1678	unsigned int inputWidth = 16;
				1679	unsigned int inputChannels = 2;
				1680	unsigned int inputBatchSize = 1;
				1681
				1682	unsigned int kernelHeight = 5;
				1683	unsigned int kernelWidth = 3;
				1684
				1685	unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
				1686	unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
				1687	unsigned int outputChannels = inputChannels * depthMultiplier;
				1688	unsigned int outputBatchSize = inputBatchSize;
				1689
				1690	armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
				1691	inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1692	armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
				1693	outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1694	armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
				1695	ArmnnType);
				1696	armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
				1697
				1698	// Set quantization parameters if the requested type is a quantized type.
				1699	if(armnn::IsQuantizedType<T>())
				1700	{
				1701	inputTensorInfo.SetQuantizationScale(qScale);
				1702	inputTensorInfo.SetQuantizationOffset(qOffset);
				1703	outputTensorInfo.SetQuantizationScale(qScale);
				1704	outputTensorInfo.SetQuantizationOffset(qOffset);
				1705	kernelDesc.SetQuantizationScale(qScale);
				1706	kernelDesc.SetQuantizationOffset(qOffset);
				1707	biasDesc.SetQuantizationScale(qScale*qScale);
				1708	biasDesc.SetQuantizationOffset(0);
				1709	}
				1710
				1711	// NOTE: originalInputData is in NCHW format
				1712	std::vector<T> originalInputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1713	QuantizedVector<T>({
				1714	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1715	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1716	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1717	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1718	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1719	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1720	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1721	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1722	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1723	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1724	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1725	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1726	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1727	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1728	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1729	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1730	},
				1731	inputTensorInfo.GetQuantizationScale(),
				1732	inputTensorInfo.GetQuantizationOffset()));
				1733
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1734	std::vector<T> inputData = originalInputData;
				1735	// at this point if we require it permute the input data
				1736	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1737	if (layout == armnn::DataLayout::NHWC)
				1738	{
				1739	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
				1740	originalInputData.data(), inputData.data(), sizeof(T));
				1741	}
				1742	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1743
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1744	std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
				1745	biasDesc.GetQuantizationScale(),
				1746	biasDesc.GetQuantizationOffset());
				1747
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1748	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1749
				1750	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1751	QuantizedVector<T>({
				1752	1, 1, 1,
				1753	1, -1, 1,
				1754	1, 1, 1,
				1755	1, 1, 1,
				1756	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1757
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1758	2, 2, 2,
				1759	2, 2, 2,
				1760	2, 2, 2,
				1761	2, 2, 2,
				1762	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1763
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1764	0, 0, 0,
				1765	0, -1, 0,
				1766	0, 0, 0,
				1767	0, 0, 0,
				1768	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1769
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1770	0, 0, 0,
				1771	0, 0, 0,
				1772	0, 1, 0,
				1773	0, 0, 0,
				1774	0, 0, 0
				1775	},
				1776	kernelDesc.GetQuantizationScale(),
				1777	kernelDesc.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1778
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1779	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1780
				1781	// Manually calculated.
				1782	std::vector<T> originalOutputImage = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1783	QuantizedVector<T>({
				1784	3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
				1785	6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
				1786	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
				1787	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1788	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1789	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1790
				1791	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1792	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1793	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1794	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1795	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1796	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1797
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1798	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1799	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1800	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1801	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1802	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1803	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1804
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1805	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1806	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1807	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1808	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1809	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1810	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1811	},
				1812	outputTensorInfo.GetQuantizationScale(),
				1813	outputTensorInfo.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1814
				1815	// Optionally apply bias to output image.
				1816	if(biasEnabled)
				1817	{
				1818	ApplyBias(originalOutputImage,
				1819	outputTensorInfo.GetQuantizationScale(),
				1820	outputTensorInfo.GetQuantizationOffset(),
				1821	biasV,
				1822	biasDesc.GetQuantizationScale(),
				1823	biasDesc.GetQuantizationOffset(),
				1824	outputWidth,
				1825	outputHeight);
				1826	}
				1827
				1828	LayerTestResult<T, 4> ret(outputTensorInfo);
				1829	std::vector<T> outputImage = originalOutputImage;
				1830	if (layout == armnn::DataLayout::NHWC)
				1831	{
				1832	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
				1833	originalOutputImage.data(), outputImage.data(), sizeof(T));
				1834	}
				1835
				1836	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1837
				1838	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1839	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1840
				1841	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1842	armnn::WorkloadInfo info;
				1843	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1844	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1845
				1846	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1847	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1848
				1849	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1850	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1851
				1852	data.m_Weight = &weightsTensor;
				1853	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1854	data.m_Parameters.m_StrideX = 2;
				1855	data.m_Parameters.m_StrideY = 1;
				1856	data.m_Parameters.m_PadLeft = 0;
				1857	data.m_Parameters.m_PadRight = 0;
				1858	data.m_Parameters.m_PadTop = 1;
				1859	data.m_Parameters.m_PadBottom = 1;
				1860	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1861	data.m_Parameters.m_DataLayout = layout;
				1862
				1863	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1864	inputHandle->Allocate();
				1865	outputHandle->Allocate();
				1866
				1867	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1868
				1869	ExecuteWorkload(*workload, memoryManager);
				1870
				1871	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1872
				1873	return ret;
				1874	}
				1875
				1876	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1877	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1878	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1879	armnn::IWorkloadFactory& workloadFactory,
				1880	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1881	const boost::multi_array<T, 4>& originalInput,
				1882	const boost::multi_array<T, 4>& originalKernel,
				1883	const boost::multi_array<B, 1>& bias,
				1884	const boost::multi_array<T, 4>& originalOutputExpected,
				1885	float qScale,
				1886	int32_t qOffset,
				1887	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1888	uint32_t padLeft = 0,
				1889	uint32_t padTop = 0,
				1890	uint32_t padRight = 0,
				1891	uint32_t padBottom = 0,
				1892	uint32_t strideX = 1,
				1893	uint32_t strideY = 1,
				1894	uint32_t dilationX = 1,
				1895	uint32_t dilationY = 1)
				1896	{
				1897	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				1898	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				1899	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				1900	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				1901
				1902	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				1903	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				1904	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				1905	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				1906
				1907	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				1908	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				1909	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				1910	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				1911
				1912	bool biasEnabled = bias.size() > 0;
				1913
				1914	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				1915	BOOST_ASSERT(inputNum == 1);
				1916	BOOST_ASSERT(outputNum == 1);
				1917
				1918	// If a bias is used, its size must equal the number of output channels.
				1919	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1920
				1921
				1922	// Note these tensors will use two (identical) batches.
				1923	armnn::TensorInfo inputTensorInfo =
				1924	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1925	armnn::TensorInfo outputTensorInfo =
				1926	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1927
				1928	// Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
				1929	armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1930
				1931	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1932
				1933	// Set quantization parameters if the requested type is a quantized type.
				1934	if(armnn::IsQuantizedType<T>())
				1935	{
				1936	inputTensorInfo.SetQuantizationScale(qScale);
				1937	inputTensorInfo.SetQuantizationOffset(qOffset);
				1938	outputTensorInfo.SetQuantizationScale(qScale);
				1939	outputTensorInfo.SetQuantizationOffset(qOffset);
				1940	kernelDesc.SetQuantizationScale(qScale);
				1941	kernelDesc.SetQuantizationOffset(qOffset);
				1942	biasDesc.SetQuantizationScale(qScale*qScale);
				1943	biasDesc.SetQuantizationOffset(0);
				1944	}
				1945
				1946	LayerTestResult<T, 4> ret(outputTensorInfo);
				1947
				1948	// Construct input data
				1949	std::vector<T> input;
				1950	input.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				1951	std::vector<T> inputData;
				1952	inputData.insert(inputData.end(), input.begin(), input.end());
				1953	inputData.insert(inputData.end(), input.begin(), input.end());
				1954
				1955	// at this point if we require it permute the input data
				1956	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1957	if (layout == armnn::DataLayout::NHWC)
				1958	{
				1959	std::vector<T> tmp(inputData.size());
				1960	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1961	inputData = tmp;
				1962	}
				1963
				1964	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1965
				1966	std::vector<T> output;
				1967	output.assign(originalOutputExpected.data(),
				1968	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1969
				1970	// Apply bias to output data if it is enabled.
				1971	if(biasEnabled)
				1972	{
				1973	std::vector<T> biasV;
				1974	biasV.assign(bias.data(), bias.data() + outputChannels);
				1975	ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1976	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1977	outputWidth, outputHeight);
				1978	}
				1979
				1980	// Construct expected output data
				1981	std::vector<T> outputData;
				1982	outputData.insert(outputData.end(), output.begin(), output.end());
				1983	outputData.insert(outputData.end(), output.begin(), output.end());
				1984
				1985	// at this point if we require it permute the expected output
				1986	if (layout == armnn::DataLayout::NHWC)
				1987	{
				1988	std::vector<T> tmp(outputData.size());
				1989	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1990	outputData = tmp;
				1991	}
				1992	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1993
				1994	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1995	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1996
				1997	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1998	armnn::WorkloadInfo info;
				1999	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2000	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2001
				2002	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				2003	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2004
				2005	if(biasEnabled)
				2006	{
				2007	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2008	}
				2009
				2010	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2011	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2012
				2013	data.m_Weight = &weightsTensor;
				2014	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				2015	data.m_Parameters.m_StrideX = strideX;
				2016	data.m_Parameters.m_StrideY = strideY;
				2017	data.m_Parameters.m_PadLeft = padLeft;
				2018	data.m_Parameters.m_PadRight = padRight;
				2019	data.m_Parameters.m_PadTop = padTop;
				2020	data.m_Parameters.m_PadBottom = padBottom;
				2021	data.m_Parameters.m_BiasEnabled = biasEnabled;
				2022	data.m_Parameters.m_DataLayout = layout;
				2023	data.m_Parameters.m_DilationX = dilationX;
				2024	data.m_Parameters.m_DilationY = dilationY;
				2025
				2026	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2027	inputHandle->Allocate();
				2028	outputHandle->Allocate();
				2029
				2030	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				2031
				2032	ExecuteWorkload(*workload, memoryManager);
				2033
				2034	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2035
				2036	return ret;
				2037	}
				2038
				2039	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2040	typename T = armnn::ResolveType<ArmnnType>>
				2041	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
				2042	armnn::IWorkloadFactory& workloadFactory,
				2043	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2044	float qScale,
				2045	int32_t qOffset,
				2046	bool biasEnabled,
				2047	const armnn::DataLayout layout)
				2048	{
				2049	// Use a single-batch 2-channel 5x5 image as input.
				2050	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2051	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2052	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2053	0, 1, 2, 3, 4,
				2054	5, 6, 7, 8, 9,
				2055	10, 11, 12, 13, 14,
				2056	15, 16, 17, 18, 19,
				2057	20, 21, 22, 23, 24,
				2058
				2059	25, 26, 27, 28, 29,
				2060	30, 31, 32, 33, 34,
				2061	35, 36, 37, 38, 39,
				2062	40, 41, 42, 43, 44,
				2063	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2064	},
				2065	inputTensorInfo.GetQuantizationScale(),
				2066	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2067
				2068	// Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
				2069	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2070	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2071	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2072	32, 31, 30, 29,
				2073	28, 27, 26, 25,
				2074	24, 23, 22, 21,
				2075	20, 19, 18, 17,
				2076
				2077	16, 15, 14, 13,
				2078	12, 11, 10, 9,
				2079	8, 7, 6, 5,
				2080	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2081	},
				2082	kernelTensorInfo.GetQuantizationScale(),
				2083	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2084
				2085	// Expected output is 1 batch of a 2-channel 5x5 image.
				2086	// Calculated using the python tensorflow library with strideX=1, strideY=1.
				2087	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2088	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2089	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2090	1062, 1580, 1850, 1530, 1117,
				2091	2140, 3108, 3500, 2842, 2042,
				2092	3580, 5068, 5460, 4342, 3062,
				2093	3618, 5072, 5390, 4248, 2971,
				2094	3074, 4282, 4510, 3533, 2457,
				2095
				2096	1550, 2284, 2362, 1955, 1428,
				2097	2910, 4206, 4342, 3528, 2536,
				2098	3390, 4886, 5022, 4068, 2916,
				2099	3566, 5056, 5182, 4133, 2922,
				2100	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2101	},
				2102	outputTensorInfo.GetQuantizationScale(),
				2103	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2104
				2105	return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
				2106	workloadFactory,
				2107	memoryManager,
				2108	input,
				2109	kernel,
				2110	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2111	expectedOutput,
				2112	qScale,
				2113	qOffset,
				2114	layout,
				2115	1, // Padding left.
				2116	1, // Padding top.
				2117	2, // Padding right.
				2118	2, // Padding bottom.
				2119	1, // strideX
				2120	1); // strideY
				2121	}
				2122
				2123	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2124	typename T = armnn::ResolveType<ArmnnType>>
				2125	LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
				2126	armnn::IWorkloadFactory& workloadFactory,
				2127	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2128	float qScale,
				2129	int32_t qOffset,
				2130	bool biasEnabled)
				2131	{
				2132	auto layout = armnn::DataLayout::NHWC;
				2133
				2134	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2135	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2136	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2137	0, 1, 2, 3, 4,
				2138	5, 6, 7, 8, 9,
				2139	10, 11, 12, 13, 14,
				2140	15, 16, 17, 18, 19,
				2141	20, 21, 22, 23, 24,
				2142
				2143	25, 26, 27, 28, 29,
				2144	30, 31, 32, 33, 34,
				2145	35, 36, 37, 38, 39,
				2146	40, 41, 42, 43, 44,
				2147	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2148	},
				2149	inputTensorInfo.GetQuantizationScale(),
				2150	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2151
				2152	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2153	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2154	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2155	32, 31, 30, 29,
				2156	28, 27, 26, 25,
				2157	24, 23, 22, 21,
				2158	20, 19, 18, 17,
				2159
				2160	16, 15, 14, 13,
				2161	12, 11, 10, 9,
				2162	8, 7, 6, 5,
				2163	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2164	},
				2165	kernelTensorInfo.GetQuantizationScale(),
				2166	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2167
				2168	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2169	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2170	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2171	1062, 1580, 1850, 1530, 1117,
				2172	2140, 3108, 3500, 2842, 2042,
				2173	3580, 5068, 5460, 4342, 3062,
				2174	3618, 5072, 5390, 4248, 2971,
				2175	3074, 4282, 4510, 3533, 2457,
				2176
				2177	1550, 2284, 2362, 1955, 1428,
				2178	2910, 4206, 4342, 3528, 2536,
				2179	3390, 4886, 5022, 4068, 2916,
				2180	3566, 5056, 5182, 4133, 2922,
				2181	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2182	},
				2183	outputTensorInfo.GetQuantizationScale(),
				2184	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2185
				2186	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2187	workloadFactory,
				2188	memoryManager,
				2189	input,
				2190	kernel,
				2191	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2192	expectedOutput,
				2193	qScale,
				2194	qOffset,
				2195	layout,
				2196	1, // Padding left.
				2197	1, // Padding top.
				2198	2, // Padding right.
				2199	2, // Padding bottom.
				2200	1, // strideX
				2201	1); // strideY
				2202	}
				2203
				2204	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2205	typename T = armnn::ResolveType<ArmnnType>>
				2206	LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
				2207	armnn::IWorkloadFactory& workloadFactory,
				2208	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2209	float qScale,
				2210	int32_t qOffset,
				2211	bool biasEnabled)
				2212	{
				2213	auto layout = armnn::DataLayout::NHWC;
				2214
				2215	armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
				2216	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2217	QuantizedVector<T>({
				2218	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2219	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2220	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2221	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2222	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2223	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2224	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2225	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2226	0, 0, 0, 0, 0, 0, 0, 0, 0
				2227	},
				2228	inputTensorInfo.GetQuantizationScale(),
				2229	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2230
				2231	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2232	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2233	QuantizedVector<T>({
				2234	1, 2, 3,
				2235	4, 5, 6,
				2236	7, 8, 9
				2237	},
				2238	kernelTensorInfo.GetQuantizationScale(),
				2239	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2240
				2241	uint32_t padLeft = 0;
				2242	uint32_t padTop = 0;
				2243	uint32_t padRight = 0;
				2244	uint32_t padBottom = 0;
				2245	uint32_t strideX = 1;
				2246	uint32_t strideY = 1;
				2247	uint32_t dilationX = 3;
				2248	uint32_t dilationY = 3;
				2249
				2250	// Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
				2251	armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2252	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2253	QuantizedVector<T>({
				2254	5, 5, 5,
				2255	5, 5, 5,
				2256	5, 5, 5
				2257	},
				2258	outputTensorInfo.GetQuantizationScale(),
				2259	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2260
				2261	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2262	workloadFactory,
				2263	memoryManager,
				2264	input,
				2265	kernel,
				2266	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2267	expectedOutput,
				2268	qScale,
				2269	qOffset,
				2270	layout,
				2271	padLeft,
				2272	padTop,
				2273	padRight,
				2274	padBottom,
				2275	strideX,
				2276	strideY,
				2277	dilationX,
				2278	dilationY);
				2279	}
				2280
				2281	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				2282	LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
				2283	armnn::IWorkloadFactory& workloadFactory,
				2284	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2285	const std::vector<float>& inputNoQuantizedValues,
				2286	armnn::TensorInfo& inputTensorInfo,
				2287	const std::vector<float>& kernelNoQuantizedValues,
				2288	armnn::TensorInfo& kernelTensorInfo,
				2289	const std::vector<float>& outputExpectedNoQuantizedValues,
				2290	armnn::TensorInfo& outputTensorInfo,
				2291	uint32_t dilationX,
				2292	uint32_t dilationY,
				2293	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				2294	bool biasEnabled = false)
				2295	{
				2296	float qScale;
				2297	int32_t qOffset;
				2298	switch (ArmnnType)
				2299	{
				2300	case armnn::DataType::QuantisedAsymm8:
				2301	{
				2302	qScale = 0.1f;
				2303	qOffset = 128;
				2304	break;
				2305	}
				2306	case armnn::DataType::QuantisedSymm16:
				2307	{
				2308	qScale = 0.1f;
				2309	qOffset = 0;
				2310	break;
				2311	}
				2312	case armnn::DataType::Float32:
				2313	default:
				2314	{
				2315	qScale = 0.f;
				2316	qOffset = 0;
				2317	break;
				2318	}
				2319	}
				2320
				2321	inputTensorInfo.SetQuantizationScale(qScale);
				2322	inputTensorInfo.SetQuantizationOffset(qOffset);
				2323	kernelTensorInfo.SetQuantizationScale(qScale);
				2324	kernelTensorInfo.SetQuantizationOffset(qOffset);
				2325	outputTensorInfo.SetQuantizationScale(qScale);
				2326	outputTensorInfo.SetQuantizationOffset(qOffset);
				2327
				2328	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2329	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				2330	inputTensorInfo.GetQuantizationScale(),
				2331	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2332	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2333	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				2334	kernelTensorInfo.GetQuantizationScale(),
				2335	kernelTensorInfo.GetQuantizationOffset())));
				2336	auto expectedOutput =
				2337	MakeTensor<T, 4>(outputTensorInfo,
				2338	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				2339	outputTensorInfo.GetQuantizationScale(),
				2340	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2341
				2342	uint32_t padLeft = 0;
				2343	uint32_t padTop = 0;
				2344	uint32_t padRight = 0;
				2345	uint32_t padBottom = 0;
				2346	uint32_t strideX = 1;
				2347	uint32_t strideY = 1;
				2348
				2349	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2350	workloadFactory,
				2351	memoryManager,
				2352	input,
				2353	kernel,
				2354	GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
				2355	expectedOutput,
				2356	qScale,
				2357	qOffset,
				2358	layout,
				2359	padLeft,
				2360	padTop,
				2361	padRight,
				2362	padBottom,
				2363	strideX,
				2364	strideY,
				2365	dilationX,
				2366	dilationY);
				2367	}
				2368
				2369	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2370	LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
				2371	armnn::IWorkloadFactory& workloadFactory,
				2372	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2373	bool biasEnabled,
				2374	const armnn::DataLayout layout)
				2375	{
				2376	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				2377	std::vector<float> inputNoQuantizedValues =
				2378	{
				2379	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2380	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2381	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2382	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2383	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2384	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2385	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2386	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2387	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2388	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2389	};
				2390
				2391	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2392	std::vector<float> kernelNoQuantizedValues =
				2393	{
				2394	1, 2, 3,
				2395	4, 5, 6,
				2396	7, 8, 9
				2397	};
				2398
				2399	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2400	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2401	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				2402	std::vector<float> outputExpectedNoQuantizedValues =
				2403	{
				2404	6., 5., 5., 5.,
				2405	6., 5., 5., 5.,
				2406	6., 5., 5., 5.,
				2407	3., 2., 2., 2.
				2408	};
				2409
				2410	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2411	workloadFactory,
				2412	memoryManager,
				2413	inputNoQuantizedValues,
				2414	inputTensorInfo,
				2415	kernelNoQuantizedValues,
				2416	kernelTensorInfo,
				2417	outputExpectedNoQuantizedValues,
				2418	outputTensorInfo,
				2419	3,
				2420	3,
				2421	layout,
				2422	biasEnabled);
				2423	}
				2424
				2425	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2426	LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
				2427	armnn::IWorkloadFactory& workloadFactory,
				2428	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2429	bool biasEnabled,
				2430	const armnn::DataLayout layout)
				2431	{
				2432	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				2433	std::vector<float> inputNoQuantizedValues =
				2434	{
				2435	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2436	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2437	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2438	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2439	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2440	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2441	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2442	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2443	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2444	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2445
				2446	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2447	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2448	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2449	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2450	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2451	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2452	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2453	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2454	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2455	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2456	};
				2457
				2458	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				2459	std::vector<float> kernelNoQuantizedValues =
				2460	{
				2461	1, 2, 3,
				2462	4, 5, 6,
				2463	7, 8, 9,
				2464
				2465	1, 2, 3,
				2466	4, 5, 6,
				2467	7, 8, 9
				2468	};
				2469
				2470	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2471	// therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2472	armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
				2473	std::vector<float> outputExpectedNoQuantizedValues =
				2474	{
				2475	6., 5., 5., 5.,
				2476	6., 5., 5., 5.,
				2477	6., 5., 5., 5.,
				2478	3., 2., 2., 2.,
				2479
				2480	6., 5., 5., 5.,
				2481	6., 5., 5., 5.,
				2482	6., 5., 5., 5.,
				2483	3., 2., 2., 2.
				2484	};
				2485
				2486	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2487	workloadFactory,
				2488	memoryManager,
				2489	inputNoQuantizedValues,
				2490	inputTensorInfo,
				2491	kernelNoQuantizedValues,
				2492	kernelTensorInfo,
				2493	outputExpectedNoQuantizedValues,
				2494	outputTensorInfo,
				2495	3,
				2496	3,
				2497	layout,
				2498	biasEnabled);
				2499	}
				2500
				2501	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2502	LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
				2503	armnn::IWorkloadFactory& workloadFactory,
				2504	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2505	bool biasEnabled,
				2506	const armnn::DataLayout layout)
				2507	{
				2508	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2509	std::vector<float> inputNoQuantizedValues =
				2510	{
				2511	10.0, 10.0, 10.0,
				2512	10.0, 10.0, 10.0,
				2513	10.0, 10.0, 10.0,
				2514
				2515	21.0, 22.0, 23.0,
				2516	24.0, 25.0, 26.0,
				2517	27.0, 28.0, 29.0
				2518	};
				2519
				2520	armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
				2521
				2522	std::vector<float> kernelNoQuantizedValues =
				2523	{
				2524	0.25f, 0.25f,
				2525	0.25f, 0.25f,
				2526
				2527	0.25f, 0.25f,
				2528	0.25f, 0.25f,
				2529
				2530	0.0f , 0.0f,
				2531	0.0f , 0.1f,
				2532
				2533	0.0f , 0.0f,
				2534	0.0f , 0.1f,
				2535
				2536	0.2f , 0.0f,
				2537	0.0f , 0.0f,
				2538
				2539	0.2f , 0.0f,
				2540	0.0f , 0.0f,
				2541
				2542	0.0f , 0.3f,
				2543	0.0f , 0.0f,
				2544
				2545	0.0f , 0.3f,
				2546	0.0f , 0.0f
				2547	};
				2548
				2549	armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
				2550	std::vector<float> outputExpectedNoQuantizedValues =
				2551	{
				2552	10.f, 10.f,
				2553	10.f, 10.f,
				2554
				2555	1.f, 1.f,
				2556	1.f, 1.f,
				2557
				2558	2.f, 2.f,
				2559	2.f, 2.f,
				2560
				2561	3.f, 3.f,
				2562	3.f, 3.f,
				2563
				2564	23.f, 24.f,
				2565	26.f, 27.f,
				2566
				2567	2.5f, 2.6000001f,
				2568	2.8f, 2.9f,
				2569
				2570	4.2000003f, 4.4f,
				2571	4.8f, 5.f,
				2572
				2573	6.6000004f, 6.9f,
				2574	7.5000005f, 7.8f
				2575	};
				2576
				2577
				2578	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2579	workloadFactory,
				2580	memoryManager,
				2581	inputNoQuantizedValues,
				2582	inputTensorInfo,
				2583	kernelNoQuantizedValues,
				2584	kernelTensorInfo,
				2585	outputExpectedNoQuantizedValues,
				2586	outputTensorInfo,
				2587	1,
				2588	1,
				2589	layout,
				2590	biasEnabled);
				2591	}
				2592
				2593	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2594	LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
				2595	armnn::IWorkloadFactory& workloadFactory,
				2596	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2597	bool biasEnabled,
				2598	const armnn::DataLayout layout)
				2599	{
				2600	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2601	std::vector<float> inputNoQuantizedValues =
				2602	{
				2603	10.0, 10.0, 10.0,
				2604	10.0, 10.0, 10.0,
				2605	10.0, 10.0, 10.0,
				2606
				2607	21.0, 22.0, 23.0,
				2608	24.0, 25.0, 26.0,
				2609	27.0, 28.0, 29.0
				2610	};
				2611
				2612	armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
				2613
				2614	std::vector<float> kernelNoQuantizedValues =
				2615	{
				2616	0.25f, 0.25f,
				2617	0.25f, 0.25f,
				2618
				2619	0.2f , 0.0f,
				2620	0.0f , 0.0f,
				2621
				2622	0.0f , 0.0f,
				2623	0.0f , 0.1f,
				2624
				2625	0.0f , 0.3f,
				2626	0.0f , 0.0f
				2627
				2628	};
				2629
				2630	armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
				2631	std::vector<float> outputExpectedNoQuantizedValues =
				2632	{
				2633	10.f, 10.f,
				2634	10.f, 10.f,
				2635
				2636	1.f, 1.f,
				2637	1.f, 1.f,
				2638
				2639	4.2000003f, 4.4f,
				2640	4.8f, 5.f,
				2641
				2642	6.6000004f, 6.9f,
				2643	7.5000005f, 7.8f
				2644	};
				2645
				2646
				2647	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2648	workloadFactory,
				2649	memoryManager,
				2650	inputNoQuantizedValues,
				2651	inputTensorInfo,
				2652	kernelNoQuantizedValues,
				2653	kernelTensorInfo,
				2654	outputExpectedNoQuantizedValues,
				2655	outputTensorInfo,
				2656	1,
				2657	1,
				2658	layout,
				2659	biasEnabled);
				2660	}
				2661
				2662	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				2663	LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
				2664	armnn::IWorkloadFactory& workloadFactory,
				2665	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2666	armnn::IWorkloadFactory& refWorkloadFactory,
				2667	const armnnUtils::DataLayoutIndexed& layout)
				2668	{
				2669	unsigned int inputHeight = 8;
				2670	unsigned int inputWidth = 16;
				2671	unsigned int inputChannels = 3;
				2672	unsigned int inputNum = 5;
				2673
				2674	unsigned int kernelHeight = 3;
				2675	unsigned int kernelWidth = 3;
				2676	unsigned int channelMultiplier = 1;
				2677
				2678	unsigned int strideX = 2;
				2679	unsigned int strideY = 3;
				2680	unsigned int padX = 1;
				2681	unsigned int padY = 1;
				2682
				2683	unsigned int outputNum = inputNum;
				2684	unsigned int outputChannels = inputChannels * channelMultiplier;
				2685	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				2686	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				2687
				2688	armnn::TensorInfo inputTensorInfo;
				2689	armnn::TensorInfo outputTensorInfo;
				2690	armnn::TensorInfo kernelDesc;
				2691	armnn::TensorInfo biasDesc;
				2692
				2693
				2694	std::vector<unsigned int> inputShape;
				2695	std::vector<unsigned int> outputShape;
				2696	std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
				2697	std::vector<unsigned int> biasShape{ outputChannels };
				2698	switch (layout.GetDataLayout())
				2699	{
				2700	case armnn::DataLayout::NCHW:
				2701	inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
				2702	outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
				2703	break;
				2704	case armnn::DataLayout ::NHWC:
				2705	inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
				2706	outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
				2707	break;
				2708	default:
				2709	throw armnn::InvalidArgumentException("unknown data layout ["
				2710	+ std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
				2711	}
				2712
				2713	float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
				2714	float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
				2715	int32_t qOffset = 0;
				2716
				2717	inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
				2718	outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
				2719	kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
				2720	biasDesc = armnn::TensorInfo(
				2721	1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
				2722
				2723	LayerTestResult<T, 4> ret(outputTensorInfo);
				2724
				2725	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
				2726	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
				2727	auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
				2728	biasDesc, 1028, 0.0f, 255.0f);
				2729
				2730	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2731	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2732
				2733	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2734	armnn::WorkloadInfo info;
				2735	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2736	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2737
				2738	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2739	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2740
				2741	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2742	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2743	data.m_Weight = &weightsTensor;
				2744	data.m_Bias = &biasTensor;
				2745	data.m_Parameters.m_StrideX = strideX;
				2746	data.m_Parameters.m_StrideY = strideY;
				2747	data.m_Parameters.m_PadLeft = padX;
				2748	data.m_Parameters.m_PadRight = padX;
				2749	data.m_Parameters.m_PadTop = padY;
				2750	data.m_Parameters.m_PadBottom = padY;
				2751	data.m_Parameters.m_BiasEnabled = true;
				2752	data.m_Parameters.m_DataLayout = layout.GetDataLayout();
				2753
				2754	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				2755	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				2756
				2757	armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
				2758	armnn::WorkloadInfo refInfo = info;
				2759	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				2760	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				2761
				2762	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2763	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
				2764
				2765	outputHandleRef->Allocate();
				2766	inputHandleRef->Allocate();
				2767
				2768	inputHandle->Allocate();
				2769	outputHandle->Allocate();
				2770
				2771	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				2772	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				2773
				2774	ExecuteWorkload(*workload, memoryManager);
				2775
				2776	workloadRef->PostAllocationConfigure();
				2777	workloadRef->Execute();
				2778
				2779	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2780	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				2781
				2782	return ret;
				2783	}
				2784
				2785	//
				2786	// Explicit template specializations
				2787	//
				2788
				2789	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2790	Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2791	armnn::IWorkloadFactory&,
				2792	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2793	bool,
				2794	armnn::DataLayout);
				2795
				2796	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2797	Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2798	armnn::IWorkloadFactory&,
				2799	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2800	bool,
				2801	armnn::DataLayout);
				2802
				2803	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2804	Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2805	armnn::IWorkloadFactory&,
				2806	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2807	bool,
				2808	armnn::DataLayout);
				2809
				2810	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2811	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2812	armnn::IWorkloadFactory&,
				2813	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2814	bool,
				2815	armnn::DataLayout);
				2816
				2817	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2818	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2819	armnn::IWorkloadFactory&,
				2820	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2821	bool,
				2822	armnn::DataLayout);
				2823
				2824	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2825	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2826	armnn::IWorkloadFactory&,
				2827	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2828	bool,
				2829	armnn::DataLayout);
				2830
				2831	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2832	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2833	armnn::IWorkloadFactory &workloadFactory,
				2834	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2835	bool biasEnabled,
				2836	const armnn::DataLayout layout);
				2837
				2838	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2839	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2840	armnn::IWorkloadFactory &workloadFactory,
				2841	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2842	bool biasEnabled,
				2843	const armnn::DataLayout layout);
				2844
				2845	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2846	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2847	armnn::IWorkloadFactory &workloadFactory,
				2848	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2849	bool biasEnabled,
				2850	const armnn::DataLayout layout);
				2851
				2852	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2853	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2854	armnn::IWorkloadFactory&,
				2855	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2856	bool,
				2857	armnn::DataLayout);
				2858
				2859	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2860	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2861	armnn::IWorkloadFactory&,
				2862	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2863	bool,
				2864	armnn::DataLayout);
				2865
				2866	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2867	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2868	armnn::IWorkloadFactory&,
				2869	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2870	bool,
				2871	armnn::DataLayout);
				2872
				2873	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2874	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2875	armnn::IWorkloadFactory&,
				2876	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2877	bool,
				2878	armnn::DataLayout);
				2879
				2880	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2881	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2882	armnn::IWorkloadFactory&,
				2883	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2884	bool,
				2885	armnn::DataLayout);
				2886
				2887	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2888	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2889	armnn::IWorkloadFactory&,
				2890	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2891	bool,
				2892	armnn::DataLayout);
				2893
				2894	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2895	DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2896	armnn::IWorkloadFactory &workloadFactory,
				2897	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2898	bool biasEnabled,
				2899	const armnn::DataLayout layout);
				2900
				2901	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2902	DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2903	armnn::IWorkloadFactory &workloadFactory,
				2904	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2905	bool biasEnabled,
				2906	const armnn::DataLayout layout);
				2907
				2908	//
				2909	// Implementation functions
				2910	//
				2911
				2912	LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
				2913	armnn::IWorkloadFactory& workloadFactory,
				2914	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2915	bool biasEnabled,
				2916	const armnn::DataLayout layout)
				2917	{
				2918	return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2919	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2920	}
				2921
				2922	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
				2923	armnn::IWorkloadFactory& workloadFactory,
				2924	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2925	bool biasEnabled,
				2926	const armnn::DataLayout layout)
				2927	{
				2928	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2929	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2930	}
				2931
				2932	LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
				2933	armnn::IWorkloadFactory& workloadFactory,
				2934	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2935	bool biasEnabled,
				2936	const armnn::DataLayout layout)
				2937	{
				2938	return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2939	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2940	}
				2941
				2942	LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
				2943	armnn::IWorkloadFactory& workloadFactory,
				2944	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2945	bool biasEnabled)
				2946	{
				2947	return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
				2948	workloadFactory,
				2949	memoryManager,
				2950	0.f,
				2951	0,
				2952	biasEnabled,
				2953	armnn::DataLayout::NHWC);
				2954	}
				2955
				2956	LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
				2957	armnn::IWorkloadFactory& workloadFactory,
				2958	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2959	bool biasEnabled,
				2960	const armnn::DataLayout layout)
				2961	{
				2962	return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
				2963	workloadFactory,
				2964	memoryManager,
				2965	0.f,
				2966	0,
				2967	biasEnabled,
				2968	layout);
				2969	}
				2970
				2971	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
				2972	armnn::IWorkloadFactory& workloadFactory,
				2973	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2974	bool biasEnabled,
				2975	const armnn::DataLayout layout)
				2976	{
				2977	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2978	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2979	}
				2980
				2981	LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
				2982	armnn::IWorkloadFactory& workloadFactory,
				2983	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2984	bool biasEnabled,
				2985	const armnn::DataLayout layout)
				2986	{
				2987	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2988	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2989	}
				2990
				2991	LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
				2992	armnn::IWorkloadFactory& workloadFactory,
				2993	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2994	bool biasEnabled,
				2995	const armnn::DataLayout layout)
				2996	{
				2997	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2998	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2999	}
				3000
				3001	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
				3002	armnn::IWorkloadFactory& workloadFactory,
				3003	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3004	armnn::DataLayout layout)
				3005	{
				3006	return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3007	workloadFactory, memoryManager, layout, 0.0f, 0);
				3008	}
				3009
				3010	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
				3011	armnn::IWorkloadFactory& workloadFactory,
				3012	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3013	armnn::DataLayout layout)
				3014	{
				3015	return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
				3016	<armnn::DataType::Float32, armnn::DataType::Float32>(
				3017	workloadFactory, memoryManager, layout, 0.0f, 0);
				3018	}
				3019
				3020	LayerTestResult<float, 4> Convolution1dTest(
				3021	armnn::IWorkloadFactory& workloadFactory,
				3022	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3023	bool biasEnabled)
				3024	{
				3025	return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3026	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3027	}
				3028
				3029	LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
				3030	armnn::IWorkloadFactory& workloadFactory,
				3031	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3032	bool biasEnabled)
				3033	{
				3034	return Convolution1dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3035	workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
				3036	}
				3037
				3038	LayerTestResult<float,4> CompareConvolution2dTest(
				3039	armnn::IWorkloadFactory& workloadFactory,
				3040	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3041	armnn::IWorkloadFactory& refWorkloadFactory)
				3042	{
				3043	return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
				3044	workloadFactory, memoryManager, refWorkloadFactory);
				3045	}
				3046
				3047	LayerTestResult<float, 4> DepthwiseConvolution2dTest(
				3048	armnn::IWorkloadFactory& workloadFactory,
				3049	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3050	bool biasEnabled,
				3051	const armnn::DataLayout layout)
				3052	{
				3053	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3054	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3055	}
				3056
				3057	LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
				3058	armnn::IWorkloadFactory& workloadFactory,
				3059	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3060	bool biasEnabled)
				3061	{
				3062	return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3063	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3064	}
				3065
				3066	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
				3067	armnn::IWorkloadFactory& workloadFactory,
				3068	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3069	bool biasEnabled,
				3070	const armnn::DataLayout layout)
				3071	{
				3072	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3073	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3074	}
				3075
				3076	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
				3077	armnn::IWorkloadFactory& workloadFactory,
				3078	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3079	{
				3080	armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
				3081	auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
				3082
				3083	std::vector<float> kernelData;
				3084	std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
				3085	for (unsigned int i = 0; i < 64; ++i)
				3086	{
				3087	kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
				3088	}
				3089	armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
				3090	auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
				3091
				3092	std::vector<float> expectedOutputData(64, 0.f);
				3093	armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
				3094	auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
				3095
				3096	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3097	workloadFactory,
				3098	memoryManager,
				3099	input,
				3100	kernel,
				3101	boost::multi_array<float, 1>(),
				3102	expectedOutput,
				3103	0.f,
				3104	0,
				3105	armnn::DataLayout::NCHW);
				3106	}
				3107
				3108	LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
				3109	armnn::IWorkloadFactory& workloadFactory,
				3110	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3111	bool biasEnabled,
				3112	const armnn::DataLayout layout)
				3113	{
				3114	return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3115	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3116	}
				3117
				3118	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
				3119	armnn::IWorkloadFactory& workloadFactory,
				3120	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3121	bool biasEnabled,
				3122	const armnn::DataLayout layout)
				3123	{
				3124	return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3125	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3126	}
				3127
				3128	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
				3129	armnn::IWorkloadFactory& workloadFactory,
				3130	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3131	bool biasEnabled,
				3132	const armnn::DataLayout layout)
				3133	{
				3134	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3135	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3136	}
				3137
				3138	LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
				3139	armnn::IWorkloadFactory& workloadFactory,
				3140	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3141	{
				3142	return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3143	workloadFactory,
				3144	memoryManager,
				3145	0.f,
				3146	0,
				3147	false);
				3148	}
				3149
				3150	LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
				3151	armnn::IWorkloadFactory& workloadFactory,
				3152	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3153	bool biasEnabled,
				3154	const armnn::DataLayout layout)
				3155	{
				3156	return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				3157	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3158	}
				3159
				3160	LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
				3161	armnn::IWorkloadFactory& workloadFactory,
				3162	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3163	bool biasEnabled,
				3164	const armnn::DataLayout layout)
				3165	{
				3166	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				3167	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3168	}
				3169
				3170	LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
				3171	armnn::IWorkloadFactory& workloadFactory,
				3172	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3173	armnn::IWorkloadFactory& refWorkloadFactory,
				3174	const armnn::DataLayout layout)
				3175	{
				3176	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
				3177	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3178	}
				3179
				3180	LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
				3181	armnn::IWorkloadFactory& workloadFactory,
				3182	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3183	armnn::IWorkloadFactory& refWorkloadFactory,
				3184	const armnn::DataLayout layout)
				3185	{
				3186	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8>(
				3187	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3188	}