Blame - src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp - ml/armnn

blob: 01c1b1868ebfc947d0f80697be3983b316603b0c [file] [log] [blame]

Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "Conv2dTestImpl.hpp"
				7
				8	#include <DataLayoutIndexed.hpp>
				9	#include <Permute.hpp>
				10	#include <TensorUtils.hpp>
				11
				12	#include <armnn/ArmNN.hpp>
				13
				14	#include <backendsCommon/CpuTensorHandle.hpp>
				15
				16	#include <backendsCommon/test/QuantizeHelper.hpp>
				17	#include <backendsCommon/test/TensorCopyUtils.hpp>
				18	#include <backendsCommon/test/WorkloadTestUtils.hpp>
				19
				20	#include <test/TensorHelpers.hpp>
				21
				22	#include <boost/numeric/conversion/cast.hpp>
				23
				24	#include <string>
				25
				26	//
				27	// Static data
				28	//
				29
				30	// 2-channel bias used by a number of Conv2d tests.
				31	static std::vector<float> Bias2({0, 2});
				32
				33	static std::vector<float> Bias4({1, 2, 3, 4});
				34
				35	static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
				36
				37	// 3-channel 16x8 image used as common input data for a number of Conv2d tests.
				38	static std::vector<float> ConvInput3x8x16({
				39	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				40	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				41	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				42	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				43	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				44	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				45	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				46	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				47	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				48	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				49	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				50	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				51	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				52	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				53	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				54	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				55	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				56	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				57	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				58	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				59	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				60	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				61	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				62	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
				63	});
				64
				65	//
				66	// Helper templates
				67	//
				68
				69	// Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
				70	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				71	boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
				72	{
				73	if(biasEnabled)
				74	{
				75	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
				76	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias2));
				77	return bias;
				78	}
				79	else
				80	{
				81	return boost::multi_array<T, 1>();
				82	}
				83	}
				84
				85	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				86	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				87	boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
				88	{
				89	if(biasEnabled)
				90	{
				91	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
				92	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias4));
				93	return bias;
				94	}
				95	else
				96	{
				97	return boost::multi_array<T, 1>();
				98	}
				99	}
				100
				101	// Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
				102	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				103	boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
				104	{
				105	if(biasEnabled)
				106	{
				107	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
				108	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias8));
				109	return bias;
				110	}
				111	else
				112	{
				113	return boost::multi_array<T, 1>();
				114	}
				115	}
				116
				117	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				118	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				119	boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
				120	{
				121	const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
				122	const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
				123	const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
				124
				125	switch (outputChannels)
				126	{
				127	case 2:
				128	default:
				129	{
				130	return GetBias2<ArmnnType>(biasEnabled, qScale);
				131	}
				132	case 4:
				133	{
				134	return GetBias4<ArmnnType>(biasEnabled, qScale);
				135	}
				136	case 8:
				137	{
				138	return GetBias8<ArmnnType>(biasEnabled, qScale);
				139	}
				140	}
				141	}
				142
				143	//
				144	// Implementation templates
				145	//
				146
				147	// Mapping from input type to bias type for fully connected layers.
				148	// float => float, uint8_t => int32_t
				149	template<typename T>
				150	struct FullyConnectedBiasTypeForInputType;
				151
				152	template<>
				153	struct FullyConnectedBiasTypeForInputType<float>
				154	{
				155	using Type = float;
				156	};
				157
				158	template<>
				159	struct FullyConnectedBiasTypeForInputType<uint8_t>
				160	{
				161	using Type = int32_t;
				162	};
				163
				164	// Modifies a std::vector in-place using a specified bias.
				165	template<typename T, typename B>
				166	void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
				167	const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
				168	{
				169	BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) \|\| (!armnn::IsQuantizedType<T>()),
				170	"Invalid type and parameter combination.");
				171	BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) \|\| (!armnn::IsQuantizedType<B>()),
				172	"Invalid type and parameter combination.");
				173
				174	// Note we need to dequantize and re-quantize the image value and the bias.
				175	for (uint32_t i = 0; i < bias.size(); ++i)
				176	{
				177	float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
				178	for (uint32_t y = 0; y < h; ++y)
				179	{
				180	for (uint32_t x = 0; x < w; ++x)
				181	{
				182	uint32_t offset = (i * h + y) * w + x;
				183	BOOST_ASSERT(offset < v.size());
				184	T& outRef = v[offset];
				185	float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
				186	outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
				187	}
				188	}
				189	}
				190	}
				191
				192	//
				193	// Convolution2d implementations
				194	//
				195
				196	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				197	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				198	LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
				199	armnn::IWorkloadFactory& workloadFactory,
				200	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				201	const boost::multi_array<T, 4>& originalInput,
				202	const boost::multi_array<T, 4>& originalKernel,
				203	const boost::multi_array<B, 1>& bias,
				204	const boost::multi_array<T, 4>& originalOutputExpected,
				205	float qScale,
				206	int32_t qOffset,
				207	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				208	uint32_t padLeft = 0,
				209	uint32_t padTop = 0,
				210	uint32_t padRight = 0,
				211	uint32_t padBottom = 0,
				212	uint32_t strideX = 1,
				213	uint32_t strideY = 1,
				214	uint32_t dilationX = 1,
				215	uint32_t dilationY = 1)
				216	{
				217	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				218	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				219	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				220	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				221
				222	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				223	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				224	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				225	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				226
				227	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				228	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				229	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				230	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				231
				232	bool biasEnabled = bias.size() > 0;
				233
				234	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				235	BOOST_ASSERT(inputNum == 1);
				236	BOOST_ASSERT(outputNum == 1);
				237
				238	// If a bias is used, its size must equal the number of output channels.
				239	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				240
				241
				242	// Note these tensors will use two (identical) batches.
				243	armnn::TensorInfo inputTensorInfo =
				244	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				245	armnn::TensorInfo outputTensorInfo =
				246	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				247	armnn::TensorInfo kernelDesc =
				248	armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
				249	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				250
				251	// Set quantization parameters if the requested type is a quantized type.
				252	if(armnn::IsQuantizedType<T>())
				253	{
				254	inputTensorInfo.SetQuantizationScale(qScale);
				255	inputTensorInfo.SetQuantizationOffset(qOffset);
				256	outputTensorInfo.SetQuantizationScale(qScale);
				257	outputTensorInfo.SetQuantizationOffset(qOffset);
				258	kernelDesc.SetQuantizationScale(qScale);
				259	kernelDesc.SetQuantizationOffset(qOffset);
				260	biasDesc.SetQuantizationScale(qScale*qScale);
				261	biasDesc.SetQuantizationOffset(0);
				262	}
				263
				264	LayerTestResult<T, 4> ret(outputTensorInfo);
				265
				266	// Construct input data - two batches of the same input image.
				267	std::vector<T> inputImage;
				268	inputImage.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				269	std::vector<T> inputData;
				270	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				271	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				272
				273	// at this point if we require it permute the input data
				274	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				275	if (layout == armnn::DataLayout::NHWC)
				276	{
				277	std::vector<T> tmp(inputData.size());
				278	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				279	inputData = tmp;
				280	}
				281
				282	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				283
				284	std::vector<T> outputImage;
				285	outputImage.assign(originalOutputExpected.data(),
				286	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				287
				288	// Apply bias to output image if it is enabled.
				289	if(biasEnabled)
				290	{
				291	std::vector<T> biasV;
				292	biasV.assign(bias.data(), bias.data() + outputChannels);
				293	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				294	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				295	outputWidth, outputHeight);
				296	}
				297
				298	// Construct expected output data - two identical images.
				299	std::vector<T> outputData;
				300	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				301	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				302
				303	// at this point if we require it permute the expected output
				304	if (layout == armnn::DataLayout::NHWC)
				305	{
				306	std::vector<T> tmp(outputData.size());
				307	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				308	outputData = tmp;
				309	}
				310	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				311
				312	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				313	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				314
				315	armnn::Convolution2dQueueDescriptor data;
				316	armnn::WorkloadInfo info;
				317	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				318	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				319	// Permute the kernel if necessary
				320	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				321	if (layout == armnn::DataLayout::NHWC)
				322	{
				323	armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
				324	}
				325	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				326
				327	if(biasEnabled)
				328	{
				329	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				330	}
				331
				332	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				333	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				334
				335	data.m_Weight = &weightsTensor;
				336	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				337	data.m_Parameters.m_StrideX = strideX;
				338	data.m_Parameters.m_StrideY = strideY;
				339	data.m_Parameters.m_PadLeft = padLeft;
				340	data.m_Parameters.m_PadRight = padRight;
				341	data.m_Parameters.m_PadTop = padTop;
				342	data.m_Parameters.m_PadBottom = padBottom;
				343	data.m_Parameters.m_BiasEnabled = biasEnabled;
				344	data.m_Parameters.m_DataLayout = layout;
				345	data.m_Parameters.m_DilationX = dilationX;
				346	data.m_Parameters.m_DilationY = dilationY;
				347
				348	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				349	inputHandle->Allocate();
				350	outputHandle->Allocate();
				351
				352	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				353
				354	ExecuteWorkload(*workload, memoryManager);
				355
				356	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				357
				358	return ret;
				359	}
				360
				361	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				362	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				363	LayerTestResult<T, 4> SimpleConvolution2dNhwcTestImpl(
				364	armnn::IWorkloadFactory& workloadFactory,
				365	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				366	const boost::multi_array<T, 4>& input,
				367	const boost::multi_array<T, 4>& kernel,
				368	const boost::multi_array<B, 1>& bias,
				369	const boost::multi_array<T, 4>& outputExpected,
				370	const armnn::DataLayout dataLayout,
				371	float qScale,
				372	int32_t qOffset,
				373	uint32_t padLeft = 1,
				374	uint32_t padTop = 1,
				375	uint32_t padRight = 1,
				376	uint32_t padBottom = 1,
				377	uint32_t strideX = 1,
				378	uint32_t strideY = 1)
				379	{
				380	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				381	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
				382	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
				383	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
				384
				385	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				386	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				387	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				388	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				389
				390	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				391	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				392	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				393	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				394
				395	bool biasEnabled = bias.size() > 0;
				396
				397	// Creates the tensors.
				398	armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
				399	armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
				400	ArmnnType);
				401	armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
				402	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				403
				404	// Construct the input data.
				405	std::vector<T> inputData;
				406	inputData.assign(input.data(), input.data() + inputHeightinputWidthinputChannels);
				407	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				408
				409	// Construct the output data, with bias applied, as appropriate.
				410	std::vector<T> outputData;
				411	outputData.assign(outputExpected.data(), outputExpected.data() + outputHeightoutputWidthoutputChannels);
				412
				413	LayerTestResult<T, 4> ret(outputTensorInfo);
				414	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				415
				416	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				417	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				418
				419	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				420	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				421
				422	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				423
				424	armnn::Convolution2dQueueDescriptor data;
				425
				426	data.m_Weight = &weightsTensor;
				427	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				428	data.m_Parameters.m_StrideX = strideX;
				429	data.m_Parameters.m_StrideY = strideY;
				430	data.m_Parameters.m_PadLeft = padLeft;
				431	data.m_Parameters.m_PadRight = padRight;
				432	data.m_Parameters.m_PadTop = padTop;
				433	data.m_Parameters.m_PadBottom = padBottom;
				434	data.m_Parameters.m_BiasEnabled = biasEnabled;
				435	data.m_Parameters.m_DataLayout = dataLayout;
				436
				437	armnn::WorkloadInfo info;
				438	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				439	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				440
				441	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				442	inputHandle->Allocate();
				443	outputHandle->Allocate();
				444
				445	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				446
				447	ExecuteWorkload(*workload, memoryManager);
				448
				449	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				450
				451	return ret;
				452	}
				453
				454	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				455	LayerTestResult<T,4> Convolution1dTestImpl(
				456	armnn::IWorkloadFactory& workloadFactory,
				457	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				458	float qScale,
				459	int32_t qOffset,
				460	bool biasEnabled)
				461	{
				462	using B = armnn::ResolveType<ArmnnBType>;
				463	// Until we have a specialist 1D convolution layer, we can fake one using
				464	// 2D convolution with the final dimension set to 1.
				465	// I don't anticipate this being particularly slow, given that convolution is implemented
				466	// as a matrix multiplication, at which point dimension doesn't matter.
				467
				468	unsigned int batchSize = 1;
				469	unsigned int inputChannels = 2;
				470	unsigned int outputChannels = 3;
				471	unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
				472	unsigned int kernelSize = 3;
				473	unsigned int padSize = 2;
				474	unsigned int stride = 1;
				475	unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
				476
				477	armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
				478	armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
				479	armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
				480	armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
				481
				482	// Set quantization parameters if the requested type is a quantized type.
				483	if(armnn::IsQuantizedType<T>())
				484	{
				485	inputInfo.SetQuantizationScale(qScale);
				486	inputInfo.SetQuantizationOffset(qOffset);
				487	outputInfo.SetQuantizationScale(qScale);
				488	outputInfo.SetQuantizationOffset(qOffset);
				489	kernelInfo.SetQuantizationScale(qScale);
				490	kernelInfo.SetQuantizationOffset(qOffset);
				491	biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
				492	biasInfo.SetQuantizationOffset(0);
				493	}
				494
				495	std::vector<T> inputData(
				496	QuantizedVector<T>(inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), {
				497	5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
				498	-3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
				499	}));
				500
				501	std::vector<T> kernelData(
				502	QuantizedVector<T>(kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset(), {
				503	1.0f, 0.0f, 0.0f,
				504	0.0f, 2.0f, -1.5f,
				505
				506	0.0f, 0.0f, 0.0f,
				507	0.2f, 0.2f, 0.2f,
				508
				509	0.5f, 0.0f, 0.5f,
				510	0.0f, -1.0f, 0.0f
				511	}));
				512
				513	std::vector<B> biasData(
				514	QuantizedVector<B>(biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), {
				515	1.0f, 0.0f, 0.0f
				516	}));
				517
				518	std::vector<T> outputData(
				519	QuantizedVector<T>(outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), {
				520	4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
				521	-0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
				522	2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
				523	}));
				524
				525	// Optionally apply bias to output image.
				526	if(biasEnabled)
				527	{
				528	ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
				529	biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
				530	1, outputSize);
				531	}
				532
				533	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				534	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				535
				536	armnn::Convolution2dQueueDescriptor data;
				537	armnn::WorkloadInfo info;
				538	armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
				539	armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
				540
				541	AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
				542	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				543
				544	AddInputToWorkload(data, info, inputInfo, inputHandle.get());
				545	AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
				546
				547	data.m_Weight = &weightsTensor;
				548	data.m_Bias = &biasTensor;
				549	data.m_Parameters.m_StrideX = 1;
				550	data.m_Parameters.m_StrideY = stride;
				551	data.m_Parameters.m_PadLeft = 0;
				552	data.m_Parameters.m_PadRight = 0;
				553	data.m_Parameters.m_PadTop = padSize;
				554	data.m_Parameters.m_PadBottom = padSize;
				555	data.m_Parameters.m_BiasEnabled = biasEnabled;
				556
				557	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				558	inputHandle->Allocate();
				559	outputHandle->Allocate();
				560
				561	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				562
				563	ExecuteWorkload(*workload, memoryManager);
				564
				565	// Output
				566	LayerTestResult<T,4> ret(outputInfo);
				567	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				568	ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
				569	return ret;
				570	}
				571
				572	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				573	LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
				574	armnn::IWorkloadFactory& workloadFactory,
				575	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				576	float qScale,
				577	int32_t qOffset,
				578	bool biasEnabled,
				579	armnn::DataLayout dataLayout)
				580	{
				581	// Use common single-batch 5x5 image.
				582
				583	armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
				584	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				585	{
				586	1, 5, 2, 3,
				587	8, 7, 3, 6,
				588	3, 3, 9, 1
				589	});
				590
				591
				592	// Use a 2-element batch of 3-channel 3x3 kernels.
				593	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				594	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
				595	4, 5, 6,
				596	0, 0, 0,
				597	3, 2, 1
				598	});
				599
				600	// Expected output is 1 batch of a 5x5 image.
				601	armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
				602
				603	const std::vector<float> outputData =
				604	{
				605	23, 41, 33, 21,
				606	44, 65, 76, 52,
				607	82, 85, 79, 42
				608	};
				609
				610	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				611
				612	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				613	workloadFactory,
				614	memoryManager,
				615	input,
				616	kernel,
				617	boost::multi_array<T, 1>(),
				618	expectedOutput,
				619	dataLayout,
				620	qScale,
				621	qOffset);
				622	}
				623
				624	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				625	LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
				626	armnn::IWorkloadFactory& workloadFactory,
				627	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				628	float qScale,
				629	int32_t qOffset,
				630	bool biasEnabled,
				631	const armnn::DataLayout& dataLayout)
				632	{
				633	// Input is a single-batch, 1 channel, 5x5 image.
				634	armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
				635	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				636	{
				637	1, 5, 2, 3, 5,
				638	8, 7, 3, 6, 3,
				639	3, 3, 9, 1, 9,
				640	4, 1, 8, 1, 3,
				641	6, 8, 1, 9, 2
				642	});
				643
				644	// Use a 3x3 kernel.
				645	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				646	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
				647	{
				648	4, 5, 6,
				649	0, 0, 0,
				650	3, 2, 1
				651	});
				652
				653	// Expected output is a single-batch, 1 channel, 3x3 image.
				654	armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
				655
				656	const std::vector<T> outputData =
				657	{
				658	23, 33, 24,
				659	91, 99, 48,
				660	26, 50, 19
				661	};
				662
				663	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				664
				665	uint32_t padLeft = 1;
				666	uint32_t padTop = 1;
				667	uint32_t padRight = 1;
				668	uint32_t padBottom = 1;
				669	uint32_t strideX = 2;
				670	uint32_t strideY = 2;
				671
				672	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				673	workloadFactory,
				674	memoryManager,
				675	input,
				676	kernel,
				677	boost::multi_array<T, 1>(),
				678	expectedOutput,
				679	dataLayout,
				680	qScale,
				681	qOffset,
				682	padLeft,
				683	padTop,
				684	padRight,
				685	padBottom,
				686	strideX,
				687	strideY);
				688	}
				689
				690	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				691	LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
				692	armnn::IWorkloadFactory& workloadFactory,
				693	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				694	float qScale,
				695	int32_t qOffset,
				696	bool biasEnabled,
				697	const armnn::DataLayout layout)
				698	{
				699	// Use common single-batch 3-channel 16x8 image.
				700	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
				701	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
				702
				703	// Use a 2-element batch with 3-channel 3x5 kernels.
				704	armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
				705	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
				706	QuantizedVector<T>(qScale, qOffset, {
				707	1, 1, 1,
				708	1, -1, 1,
				709	1, 1, 1,
				710	1, 1, 1,
				711	1, 1, 1,
				712
				713	0, 0, 0,
				714	0, 0, 0,
				715	0, 0, 0,
				716	0, 0, 0,
				717	0, 0, 0,
				718
				719	2, 2, 2,
				720	2, 2, 2,
				721	2, 2, 2,
				722	2, 2, 2,
				723	2, 2, 2,
				724
				725
				726	0, 0, 0,
				727	0, 0, 0,
				728	0, 0, 0,
				729	0, 0, 0,
				730	0, 0, 0,
				731
				732	1, 1, 1,
				733	1, 1, 1,
				734	1, 1, 1,
				735	1, 1, 1,
				736	1, 1, 1,
				737
				738	0, 0, 0,
				739	0, 0, 0,
				740	0, 0, 0,
				741	0, 0, 0,
				742	0, 0, 0
				743	})));
				744
				745	// Expected output is 2 batch elements of a 1-channel 14x4 image.
				746	armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
				747	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
				748	QuantizedVector<T>(qScale, qOffset, {
				749	-24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
				750	-25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
				751	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				752	-23.5f, -23.5f, -23.5f,
				753	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				754	-23.5f, -23.5f, -23.5f,
				755
				756	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				757	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				758	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				759	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				760	})));
				761
				762	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				763	workloadFactory,
				764	memoryManager,
				765	input,
				766	kernel,
				767	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				768	expectedOutput,
				769	qScale,
				770	qOffset,
				771	layout);
				772	}
				773
				774	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				775	typename T = armnn::ResolveType<ArmnnType>>
				776	LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
				777	armnn::IWorkloadFactory& workloadFactory,
				778	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				779	float qScale,
				780	int32_t qOffset,
				781	bool biasEnabled,
				782	const armnn::DataLayout layout)
				783	{
				784	// Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
				785
				786	// Use common single-batch 3-channel 16x8 image.
				787	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
				788	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
				789
				790	// Use a 2-element batch of 3-channel 3x3 kernels.
				791	armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
				792	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
				793	QuantizedVector<T>(qScale, qOffset, {
				794	1, 1, 1,
				795	1, -1, 1,
				796	1, 1, 1,
				797
				798	0, 0, 0,
				799	0, 0, 0,
				800	0, 0, 0,
				801
				802	2, 2, 2,
				803	2, 2, 2,
				804	2, 2, 2,
				805
				806
				807	0, 0, 0,
				808	0, 0, 0,
				809	0, 0, 0,
				810
				811	1, 1, 1,
				812	1, 1, 1,
				813	1, 1, 1,
				814
				815	0, 0, 0,
				816	0, 0, 0,
				817	0, 0, 0
				818	})));
				819
				820	// Expected output is 1 batch of a 2-channel 14x6 image.
				821	armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
				822	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
				823	QuantizedVector<T>(qScale, qOffset, {
				824	-15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
				825	-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
				826	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				827	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				828	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				829	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				830
				831	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				832	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				833	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				834	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				835	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				836	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				837	})));
				838
				839	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				840	workloadFactory,
				841	memoryManager,
				842	input,
				843	kernel,
				844	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				845	expectedOutput,
				846	qScale,
				847	qOffset,
				848	layout);
				849	}
				850
				851	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				852	typename T = armnn::ResolveType<ArmnnType>>
				853	LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
				854	armnn::IWorkloadFactory& workloadFactory,
				855	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				856	const armnn::DataLayout layout,
				857	float qScale,
				858	int32_t qOffset)
				859	{
				860	// Use a single-batch 1-channel 3x3 image as input.
				861	armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
				862	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
				863	QuantizedVector<T>(qScale, qOffset, {
				864	11,21,31,
				865	12,22,32,
				866	13,23,33
				867	})));
				868
				869	// Use 1 batch of a 1-channel 2x2 kernel.
				870	armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
				871	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
				872	QuantizedVector<T>(qScale, qOffset, {
				873	-11,-21,
				874	-12,-22,
				875	})));
				876
				877	// Expected output is 1 batch of a 1-channel 6x8 image.
				878	// Manually calculated like this:
				879	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				880	//[-110 -210 -120 -2211 ; -110 -210 -1211 -2221 ; -110 -210 -1221 -2231 ; -110 -210 -1231 -220 ..]
				881	//[-110 -2111 -120 -2212 ; -1111 -2121 -1212 -2222 ; -1121 -2131 -1222 -2232 ; -1131 -210 -1232 -220 ..]
				882	//[-110 -2112 -120 -2213 ; -1112 -2122 -1213 -2223 ; -1122 -2132 -1223 -2233 ; -1132 -210 -1233 -220 ..]
				883	//[-110 -2113 -120 -220 ; -1113 -2123 -120 -220 ; -1123 -2133 -120 -220 ; -1133 -210 -120 -220 ..]
				884	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				885	//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
				886	armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
				887	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
				888	QuantizedVector<T>(qScale, qOffset, {
				889	0, 0, 0, 0, 0, 0,
				890	-242, -594, -934, -372, 0, 0,
				891	-495, -1190, -1850, -725, 0, 0,
				892	-538, -1256, -1916, -748, 0, 0,
				893	-273, -626, -946, -363, 0, 0,
				894	0, 0, 0, 0, 0, 0,
				895	0, 0, 0, 0, 0, 0,
				896	0, 0, 0, 0, 0, 0
				897	})));
				898
				899	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				900	workloadFactory,
				901	memoryManager,
				902	input,
				903	kernel,
				904	GetBias2<ArmnnBType>(false, qScale * qScale),
				905	expectedOutput,
				906	qScale,
				907	qOffset,
				908	layout,
				909	1, // Padding left.
				910	2, // Padding top.
				911	3, // Padding right.
				912	4); // Padding bottom.
				913	}
				914
				915	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				916	typename T = armnn::ResolveType<ArmnnType>>
				917	LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
				918	armnn::IWorkloadFactory& workloadFactory,
				919	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				920	const armnn::DataLayout layout,
				921	float qScale,
				922	int32_t qOffset)
				923	{
				924	// Use a single-batch 1-channel 5x5 image as input.
				925	armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
				926	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
				927	QuantizedVector<T>(qScale, qOffset, {
				928	11,21,31,41,51,
				929	12,22,32,42,52,
				930	13,23,33,43,53,
				931	14,24,34,44,54,
				932	15,25,35,45,55,
				933	})));
				934
				935	// Use 1 batch of a 1-channel 4x4 kernel.
				936	armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
				937	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
				938	QuantizedVector<T>(qScale, qOffset, {
				939	-11,-21,-31,-41,
				940	-12,-22,-32,-42,
				941	-13,-23,-33,-43,
				942	-14,-24,-34,-44,
				943	})));
				944
				945	// Expected output is 1 batch of a 1-channel 5x5 image.
				946	armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
				947	std::vector<T> myVec(outputDesc.GetNumElements(), 0);
				948	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
				949	QuantizedVector<T>(qScale, qOffset, {
				950	-7140, -10580, -13940, -9300, -5230,
				951	-9590, -14120, -18520, -12290, -6860,
				952	-9980, -14560, -18960, -12560, -7000,
				953	-7518, -10904, -14144, -9318, -5152,
				954	-5032, -7256, -9376, -6142, -3368,
				955	})));
				956
				957	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				958	workloadFactory,
				959	memoryManager,
				960	input,
				961	kernel,
				962	GetBias2<ArmnnBType>(false, qScale * qScale),
				963	expectedOutput,
				964	qScale,
				965	qOffset,
				966	layout,
				967	1, // Padding left.
				968	1, // Padding top.
				969	2, // Padding right.
				970	2); // Padding bottom.
				971	}
				972
				973	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				974	LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
				975	armnn::IWorkloadFactory& workloadFactory,
				976	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				977	const std::vector<float>& inputNoQuantizedValues,
				978	armnn::TensorInfo& inputTensorInfo,
				979	const std::vector<float>& kernelNoQuantizedValues,
				980	armnn::TensorInfo& kernelTensorInfo,
				981	const std::vector<float>& outputExpectedNoQuantizedValues,
				982	armnn::TensorInfo& outputTensorInfo,
				983	uint32_t dilationX,
				984	uint32_t dilationY,
				985	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				986	uint32_t padLeft = 0,
				987	uint32_t padTop = 0,
				988	uint32_t padRight = 0,
				989	uint32_t padBottom = 0,
				990	uint32_t strideX = 1,
				991	uint32_t strideY = 1,
				992	bool biasEnabled = false
				993	)
				994	{
				995	float qScale;
				996	int32_t qOffset;
				997	switch (ArmnnType)
				998	{
				999	case armnn::DataType::QuantisedAsymm8:
				1000	{
				1001	qScale = 0.1f;
				1002	qOffset = 128;
				1003	break;
				1004	}
				1005	case armnn::DataType::QuantisedSymm16:
				1006	{
				1007	qScale = 0.1f;
				1008	qOffset = 0;
				1009	break;
				1010	}
				1011	case armnn::DataType::Float32:
				1012	default:
				1013	{
				1014	qScale = 0.f;
				1015	qOffset = 0;
				1016	break;
				1017	}
				1018	}
				1019
				1020	inputTensorInfo.SetQuantizationScale(qScale);
				1021	inputTensorInfo.SetQuantizationOffset(qOffset);
				1022	kernelTensorInfo.SetQuantizationScale(qScale);
				1023	kernelTensorInfo.SetQuantizationOffset(qOffset);
				1024	outputTensorInfo.SetQuantizationScale(qScale);
				1025	outputTensorInfo.SetQuantizationOffset(qOffset);
				1026
				1027	auto input = MakeTensor<T, 4>(inputTensorInfo,
				1028	std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
				1029	inputTensorInfo.GetQuantizationOffset(),
				1030	inputNoQuantizedValues)));
				1031	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
				1032	std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(),
				1033	kernelTensorInfo.GetQuantizationOffset(),
				1034	kernelNoQuantizedValues)));
				1035	auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo,
				1036	std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
				1037	outputTensorInfo.GetQuantizationOffset(),
				1038	outputExpectedNoQuantizedValues)));
				1039
				1040	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				1041	workloadFactory,
				1042	memoryManager,
				1043	input,
				1044	kernel,
				1045	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				1046	expectedOutput,
				1047	qScale,
				1048	qOffset,
				1049	layout,
				1050	padLeft,
				1051	padTop,
				1052	padRight,
				1053	padBottom,
				1054	strideX,
				1055	strideY,
				1056	dilationX,
				1057	dilationY);
				1058	}
				1059
				1060	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1061	LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
				1062	armnn::IWorkloadFactory& workloadFactory,
				1063	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1064	bool biasEnabled,
				1065	const armnn::DataLayout layout)
				1066	{
				1067	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1068	std::vector<float> inputNoQuantizedValues =
				1069	{
				1070	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1071	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1072	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1073	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1074	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1075	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1076	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1077	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1078	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1079	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1080	};
				1081
				1082	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				1083	std::vector<float> kernelNoQuantizedValues =
				1084	{
				1085	1, 2, 3,
				1086	4, 5, 6,
				1087	7, 8, 9
				1088	};
				1089
				1090	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1091	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1092	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1093	std::vector<float> outputExpectedNoQuantizedValues =
				1094	{
				1095	6., 5., 5., 5.,
				1096	6., 5., 5., 5.,
				1097	6., 5., 5., 5.,
				1098	3., 2., 2., 2.
				1099	};
				1100
				1101	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1102	workloadFactory,
				1103	memoryManager,
				1104	inputNoQuantizedValues,
				1105	inputTensorInfo,
				1106	kernelNoQuantizedValues,
				1107	kernelTensorInfo,
				1108	outputExpectedNoQuantizedValues,
				1109	outputTensorInfo,
				1110	3,
				1111	3,
				1112	layout,
				1113	biasEnabled);
				1114	}
				1115
				1116	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1117	LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
				1118	armnn::IWorkloadFactory& workloadFactory,
				1119	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1120	bool biasEnabled,
				1121	const armnn::DataLayout layout)
				1122	{
				1123	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				1124	std::vector<float> inputNoQuantizedValues =
				1125	{
				1126	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1127	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1128	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1129	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1130	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1131	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1132	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1133	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1134	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1135	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1136
				1137	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1138	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1139	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1140	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1141	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1142	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1143	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1144	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1145	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1146	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1147	};
				1148
				1149	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				1150	std::vector<float> kernelNoQuantizedValues =
				1151	{
				1152	1, 2, 3,
				1153	4, 5, 6,
				1154	7, 8, 9,
				1155
				1156	1, 2, 3,
				1157	4, 5, 6,
				1158	7, 8, 9
				1159	};
				1160
				1161	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1162	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1163	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1164	std::vector<float> outputExpectedNoQuantizedValues =
				1165	{
				1166	12., 10., 10., 10.,
				1167	12., 10., 10., 10.,
				1168	12., 10., 10., 10.,
				1169	6., 4., 4., 4.
				1170	};
				1171
				1172	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1173	workloadFactory,
				1174	memoryManager,
				1175	inputNoQuantizedValues,
				1176	inputTensorInfo,
				1177	kernelNoQuantizedValues,
				1178	kernelTensorInfo,
				1179	outputExpectedNoQuantizedValues,
				1180	outputTensorInfo,
				1181	3,
				1182	3,
				1183	layout,
				1184	biasEnabled);
				1185	}
				1186
				1187	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1188	LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
				1189	armnn::IWorkloadFactory &workloadFactory,
				1190	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				1191	bool biasEnabled,
				1192	const armnn::DataLayout layout)
				1193	{
				1194	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1195	std::vector<float> inputNoQuantizedValues =
				1196	{
				1197	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1198	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1199	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1200	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1201	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1202	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1203	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1204	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1205	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1206	1, 1, 1, 1, 1, 1, 1, 1, 1, 1
				1207	};
				1208
				1209	armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
				1210	std::vector<float> kernelNoQuantizedValues =
				1211	{
				1212	1, 2,
				1213	3, 4
				1214	};
				1215
				1216	// Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
				1217	// therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
				1218	// where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
				1219	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1220	std::vector<float> outputExpectedNoQuantizedValues =
				1221	{
				1222	4, 7, 7, 3,
				1223	6, 10, 10, 4,
				1224	6, 10, 10, 4,
				1225	2, 3, 3, 1
				1226	};
				1227	uint32_t padLeft = 1;
				1228	uint32_t padTop = 1;
				1229	uint32_t padRight = 1;
				1230	uint32_t padBottom = 1;
				1231
				1232	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1233	workloadFactory,
				1234	memoryManager,
				1235	inputNoQuantizedValues,
				1236	inputTensorInfo,
				1237	kernelNoQuantizedValues,
				1238	kernelTensorInfo,
				1239	outputExpectedNoQuantizedValues,
				1240	outputTensorInfo,
				1241	2,
				1242	2,
				1243	layout,
				1244	padLeft,
				1245	padTop,
				1246	padRight,
				1247	padBottom,
				1248	3,
				1249	3,
				1250	biasEnabled
				1251	);
				1252	}
				1253
				1254	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				1255	LayerTestResult<T,4> CompareConvolution2dTestImpl(
				1256	armnn::IWorkloadFactory& workloadFactory,
				1257	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1258	armnn::IWorkloadFactory& refWorkloadFactory)
				1259	{
				1260	unsigned int inputHeight = 8;
				1261	unsigned int inputWidth = 16;
				1262	unsigned int inputChannels = 3;
				1263	unsigned int inputNum = 5;
				1264
				1265	unsigned int kernelHeight = 3;
				1266	unsigned int kernelWidth = 3;
				1267
				1268	unsigned int strideX = 2;
				1269	unsigned int strideY = 3;
				1270	unsigned int padX = 1;
				1271	unsigned int padY = 1;
				1272
				1273	unsigned int outputNum = inputNum;
				1274	unsigned int outputChannels = 2;
				1275	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				1276	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				1277
				1278	armnn::TensorInfo inputTensorInfo;
				1279	armnn::TensorInfo outputTensorInfo;
				1280	armnn::TensorInfo kernelDesc;
				1281	armnn::TensorInfo biasDesc;
				1282
				1283	unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
				1284	unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
				1285	unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
				1286	unsigned int biasShape[] = {outputChannels};
				1287
				1288	inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
				1289	outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
				1290	kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
				1291	biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
				1292
				1293	LayerTestResult<T,4> ret(outputTensorInfo);
				1294
				1295	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
				1296	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
				1297	auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
				1298
				1299	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1300	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1301
				1302	armnn::Convolution2dQueueDescriptor data;
				1303	armnn::WorkloadInfo info;
				1304	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1305	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1306
				1307	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1308	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1309
				1310	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1311	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1312	data.m_Weight = &weightsTensor;
				1313	data.m_Bias = &biasTensor;
				1314	data.m_Parameters.m_StrideX = strideX;
				1315	data.m_Parameters.m_StrideY = strideY;
				1316	data.m_Parameters.m_PadLeft = padX;
				1317	data.m_Parameters.m_PadRight = padX;
				1318	data.m_Parameters.m_PadTop = padY;
				1319	data.m_Parameters.m_PadBottom = padY;
				1320	data.m_Parameters.m_BiasEnabled = true;
				1321
				1322	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				1323	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				1324
				1325	armnn::Convolution2dQueueDescriptor refData = data;
				1326	armnn::WorkloadInfo refInfo = info;
				1327	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				1328	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				1329
				1330	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				1331	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
				1332
				1333	outputHandleRef->Allocate();
				1334	inputHandleRef->Allocate();
				1335
				1336	inputHandle->Allocate();
				1337	outputHandle->Allocate();
				1338
				1339	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1340	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				1341
				1342	ExecuteWorkload(*workload, memoryManager);
				1343
				1344	workloadRef->PostAllocationConfigure();
				1345	workloadRef->Execute();
				1346
				1347	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1348	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				1349
				1350	return ret;
				1351	}
				1352
				1353	//
				1354	// DepthwiseConvolution2d implementations
				1355	//
				1356
				1357	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1358	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1359	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
				1360	armnn::IWorkloadFactory& workloadFactory,
				1361	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1362	const boost::multi_array<T, 4>& input,
				1363	const boost::multi_array<T, 4>& kernel,
				1364	const boost::multi_array<B, 1>& bias,
				1365	const boost::multi_array<T, 4>& outputExpected,
				1366	float qScale,
				1367	int32_t qOffset,
				1368	const armnn::DataLayout layout,
				1369	uint32_t padLeft = 0,
				1370	uint32_t padTop = 0,
				1371	uint32_t padRight = 0,
				1372	uint32_t padBottom = 0,
				1373	uint32_t strideX = 1,
				1374	uint32_t strideY = 1)
				1375	{
				1376	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				1377	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
				1378	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
				1379	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
				1380	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				1381	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				1382	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				1383	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				1384	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				1385	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				1386	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				1387	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				1388
				1389	// If a bias is used, its size must equal the number of output channels.
				1390	bool biasEnabled = bias.size() > 0;
				1391	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1392
				1393	// Creates the tensors.
				1394	armnn::TensorInfo inputTensorInfo =
				1395	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1396	armnn::TensorInfo outputTensorInfo =
				1397	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1398	armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1399	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1400
				1401	// Set quantization parameters if the requested type is a quantized type.
				1402	if (armnn::IsQuantizedType<T>())
				1403	{
				1404	inputTensorInfo.SetQuantizationScale(qScale);
				1405	inputTensorInfo.SetQuantizationOffset(qOffset);
				1406	outputTensorInfo.SetQuantizationScale(qScale);
				1407	outputTensorInfo.SetQuantizationOffset(qOffset);
				1408	kernelDesc.SetQuantizationScale(qScale);
				1409	kernelDesc.SetQuantizationOffset(qOffset);
				1410	biasDesc.SetQuantizationScale(qScale*qScale);
				1411	biasDesc.SetQuantizationOffset(0);
				1412	}
				1413
				1414	// Construct the input data.
				1415	std::vector<T> inputData;
				1416	inputData.assign(input.data(), input.data() + inputChannelsinputHeightinputWidth);
				1417
				1418	// At this point if we require it permute the input data
				1419	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1420	if (layout == armnn::DataLayout::NHWC)
				1421	{
				1422	std::vector<T> tmp(inputData.size());
				1423	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1424	inputData = tmp;
				1425	}
				1426
				1427	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1428
				1429	// Construct the output data, with bias applied, as appropriate.
				1430	std::vector<T> outputData;
				1431	outputData.assign(outputExpected.data(), outputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1432	if (biasEnabled)
				1433	{
				1434	std::vector<T> biasV;
				1435	biasV.assign(bias.data(), bias.data() + outputChannels);
				1436	ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1437	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1438	outputWidth, outputHeight);
				1439	}
				1440
				1441	LayerTestResult<T, 4> ret(outputTensorInfo);
				1442
				1443	// At this point if we require it permute the expected output
				1444	if (layout == armnn::DataLayout::NHWC)
				1445	{
				1446	std::vector<T> tmp(outputData.size());
				1447	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1448	outputData = tmp;
				1449	}
				1450
				1451	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1452
				1453	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1454	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1455
				1456	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1457
				1458	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1459
				1460	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1461	if (biasEnabled)
				1462	{
				1463	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1464	}
				1465
				1466	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1467	data.m_Weight = &weightsTensor;
				1468	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
				1469	data.m_Parameters.m_StrideX = strideX;
				1470	data.m_Parameters.m_StrideY = strideY;
				1471	data.m_Parameters.m_PadLeft = padLeft;
				1472	data.m_Parameters.m_PadRight = padRight;
				1473	data.m_Parameters.m_PadTop = padTop;
				1474	data.m_Parameters.m_PadBottom = padBottom;
				1475	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1476	data.m_Parameters.m_DataLayout = layout;
				1477
				1478	armnn::WorkloadInfo info;
				1479	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1480	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1481
				1482	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1483	inputHandle->Allocate();
				1484	outputHandle->Allocate();
				1485
				1486	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				1487
				1488	ExecuteWorkload(*workload, memoryManager);
				1489
				1490	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1491
				1492	return ret;
				1493	}
				1494
				1495	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1496	LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
				1497	armnn::IWorkloadFactory& workloadFactory,
				1498	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1499	float qScale,
				1500	int32_t qOffset,
				1501	bool biasEnabled,
				1502	const armnn::DataLayout layout)
				1503	{
				1504	using B = armnn::ResolveType<ArmnnBType>;
				1505
				1506	unsigned int inputHeight = 3;
				1507	unsigned int inputWidth = 3;
				1508	unsigned int inputChannels = 2;
				1509	unsigned int inputNum = 1;
				1510
				1511	unsigned int kernelHeight = 3;
				1512	unsigned int kernelWidth = 3;
				1513	unsigned int kernelChannels = inputChannels;
				1514	unsigned int kernelDepthMultiplier = 1;
				1515
				1516	unsigned int outputHeight = 1;
				1517	unsigned int outputWidth = 1;
				1518	unsigned int outputChannels = kernelChannels;
				1519	unsigned int outputNum = inputNum;
				1520
				1521	armnn::TensorInfo inputTensorInfo =
				1522	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1523	armnn::TensorInfo outputTensorInfo =
				1524	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1525	armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
				1526	ArmnnType);
				1527	armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
				1528
				1529	// Set quantization parameters if the requested type is a quantized type.
				1530	if(armnn::IsQuantizedType<T>())
				1531	{
				1532	inputTensorInfo.SetQuantizationScale(qScale);
				1533	inputTensorInfo.SetQuantizationOffset(qOffset);
				1534	outputTensorInfo.SetQuantizationScale(qScale);
				1535	outputTensorInfo.SetQuantizationOffset(qOffset);
				1536	kernelDesc.SetQuantizationScale(qScale);
				1537	kernelDesc.SetQuantizationOffset(qOffset);
				1538	biasDesc.SetQuantizationScale(qScale*qScale);
				1539	biasDesc.SetQuantizationOffset(0);
				1540	}
				1541	std::vector<T> inputData = std::vector<T>(
				1542	QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
				1543	1.f, 2.f, 1.f,
				1544	2.f, 1.f, 2.f,
				1545	1.f, 2.f, 1.f,
				1546
				1547	1.f, 2.f, 1.f,
				1548	2.f, 1.f, 2.f,
				1549	1.f, 2.f, 1.f,
				1550	}));
				1551	// at this point if we require it permute the input data
				1552	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1553	if (layout == armnn::DataLayout::NHWC)
				1554	{
				1555	std::vector<T> tmp(inputData.size());
				1556	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1557	inputData = tmp;
				1558	}
				1559	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1560
				1561	std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1562	{0, 2}));
				1563	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1564
				1565	std::vector<T> kernelData = std::vector<T>(
				1566	QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
				1567	1.f, 0.f, 1.f,
				1568	0.f, 0.f, 0.f,
				1569	-1.f, 0.f, -1.f,
				1570
				1571	1.f, 0.f, 1.f,
				1572	0.f, 0.f, 0.f,
				1573	-1.f, 0.f, -1.f,
				1574	}));
				1575	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1576
				1577	// Manually calculated.
				1578	std::vector<T> outputImage(
				1579	QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
				1580	outputTensorInfo.GetQuantizationOffset(),
				1581	{0.f, 0.f})
				1582	);
				1583
				1584	// Optionally apply bias to output image.
				1585	if(biasEnabled)
				1586	{
				1587	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1588	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1589	outputWidth, outputHeight);
				1590	}
				1591
				1592	LayerTestResult<T, 4> ret(outputTensorInfo);
				1593	if (layout == armnn::DataLayout::NHWC)
				1594	{
				1595	std::vector<T> tmp(outputImage.size());
				1596	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
				1597	outputImage = tmp;
				1598	}
				1599
				1600	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1601
				1602	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1603	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1604
				1605	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1606	armnn::WorkloadInfo info;
				1607	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1608	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1609
				1610	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1611	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1612
				1613	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1614	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1615
				1616	data.m_Weight = &weightsTensor;
				1617	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1618	data.m_Parameters.m_StrideX = 1;
				1619	data.m_Parameters.m_StrideY = 1;
				1620	data.m_Parameters.m_PadLeft = 0;
				1621	data.m_Parameters.m_PadRight = 0;
				1622	data.m_Parameters.m_PadTop = 0;
				1623	data.m_Parameters.m_PadBottom = 0;
				1624	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1625	data.m_Parameters.m_DataLayout = layout;
				1626
				1627	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1628	inputHandle->Allocate();
				1629	outputHandle->Allocate();
				1630
				1631	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1632
				1633	ExecuteWorkload(*workload, memoryManager);
				1634
				1635	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1636
				1637	return ret;
				1638	}
				1639
				1640	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1641	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1642	armnn::IWorkloadFactory& workloadFactory,
				1643	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1644	float qScale,
				1645	int32_t qOffset,
				1646	bool biasEnabled,
				1647	const armnn::DataLayout layout)
				1648	{
				1649	using B = armnn::ResolveType<ArmnnBType>;
				1650
				1651	unsigned int depthMultiplier = 2;
				1652
				1653	unsigned int inputHeight = 8;
				1654	unsigned int inputWidth = 16;
				1655	unsigned int inputChannels = 2;
				1656	unsigned int inputBatchSize = 1;
				1657
				1658	unsigned int kernelHeight = 5;
				1659	unsigned int kernelWidth = 3;
				1660
				1661	unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
				1662	unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
				1663	unsigned int outputChannels = inputChannels * depthMultiplier;
				1664	unsigned int outputBatchSize = inputBatchSize;
				1665
				1666	armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
				1667	inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1668	armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
				1669	outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1670	armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
				1671	ArmnnType);
				1672	armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
				1673
				1674	// Set quantization parameters if the requested type is a quantized type.
				1675	if(armnn::IsQuantizedType<T>())
				1676	{
				1677	inputTensorInfo.SetQuantizationScale(qScale);
				1678	inputTensorInfo.SetQuantizationOffset(qOffset);
				1679	outputTensorInfo.SetQuantizationScale(qScale);
				1680	outputTensorInfo.SetQuantizationOffset(qOffset);
				1681	kernelDesc.SetQuantizationScale(qScale);
				1682	kernelDesc.SetQuantizationOffset(qOffset);
				1683	biasDesc.SetQuantizationScale(qScale*qScale);
				1684	biasDesc.SetQuantizationOffset(0);
				1685	}
				1686
				1687	// NOTE: originalInputData is in NCHW format
				1688	std::vector<T> originalInputData = std::vector<T>(
				1689	QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
				1690	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1691	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1692	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1693	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1694	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1695	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1696	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1697	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1698	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1699	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1700	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1701	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1702	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1703	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1704	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1705	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1706	}));
				1707	std::vector<T> inputData = originalInputData;
				1708	// at this point if we require it permute the input data
				1709	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1710	if (layout == armnn::DataLayout::NHWC)
				1711	{
				1712	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
				1713	originalInputData.data(), inputData.data(), sizeof(T));
				1714	}
				1715	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1716
				1717	std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1718	{0, 2, 1, -1}));
				1719	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1720
				1721	std::vector<T> kernelData = std::vector<T>(
				1722	QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
				1723	1, 1, 1,
				1724	1, -1, 1,
				1725	1, 1, 1,
				1726	1, 1, 1,
				1727	1, 1, 1,
				1728
				1729	2, 2, 2,
				1730	2, 2, 2,
				1731	2, 2, 2,
				1732	2, 2, 2,
				1733	2, 2, 2,
				1734
				1735	0, 0, 0,
				1736	0, -1, 0,
				1737	0, 0, 0,
				1738	0, 0, 0,
				1739	0, 0, 0,
				1740
				1741	0, 0, 0,
				1742	0, 0, 0,
				1743	0, 1, 0,
				1744	0, 0, 0,
				1745	0, 0, 0
				1746
				1747	}));
				1748	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1749
				1750	// Manually calculated.
				1751	std::vector<T> originalOutputImage = std::vector<T>(
				1752	QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {
				1753	3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
				1754	6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
				1755	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
				1756	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1757	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1758	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
				1759
				1760	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1761	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1762	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1763	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1764	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1765	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1766
				1767	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1768	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1769	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1770	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1771	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1772	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1773
				1774	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1775	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1776	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1777	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1778	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1779	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1780	}));
				1781
				1782	// Optionally apply bias to output image.
				1783	if(biasEnabled)
				1784	{
				1785	ApplyBias(originalOutputImage,
				1786	outputTensorInfo.GetQuantizationScale(),
				1787	outputTensorInfo.GetQuantizationOffset(),
				1788	biasV,
				1789	biasDesc.GetQuantizationScale(),
				1790	biasDesc.GetQuantizationOffset(),
				1791	outputWidth,
				1792	outputHeight);
				1793	}
				1794
				1795	LayerTestResult<T, 4> ret(outputTensorInfo);
				1796	std::vector<T> outputImage = originalOutputImage;
				1797	if (layout == armnn::DataLayout::NHWC)
				1798	{
				1799	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
				1800	originalOutputImage.data(), outputImage.data(), sizeof(T));
				1801	}
				1802
				1803	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1804
				1805	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1806	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1807
				1808	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1809	armnn::WorkloadInfo info;
				1810	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1811	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1812
				1813	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1814	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1815
				1816	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1817	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1818
				1819	data.m_Weight = &weightsTensor;
				1820	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1821	data.m_Parameters.m_StrideX = 2;
				1822	data.m_Parameters.m_StrideY = 1;
				1823	data.m_Parameters.m_PadLeft = 0;
				1824	data.m_Parameters.m_PadRight = 0;
				1825	data.m_Parameters.m_PadTop = 1;
				1826	data.m_Parameters.m_PadBottom = 1;
				1827	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1828	data.m_Parameters.m_DataLayout = layout;
				1829
				1830	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1831	inputHandle->Allocate();
				1832	outputHandle->Allocate();
				1833
				1834	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1835
				1836	ExecuteWorkload(*workload, memoryManager);
				1837
				1838	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1839
				1840	return ret;
				1841	}
				1842
				1843	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1844	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1845	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1846	armnn::IWorkloadFactory& workloadFactory,
				1847	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1848	const boost::multi_array<T, 4>& originalInput,
				1849	const boost::multi_array<T, 4>& originalKernel,
				1850	const boost::multi_array<B, 1>& bias,
				1851	const boost::multi_array<T, 4>& originalOutputExpected,
				1852	float qScale,
				1853	int32_t qOffset,
				1854	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1855	uint32_t padLeft = 0,
				1856	uint32_t padTop = 0,
				1857	uint32_t padRight = 0,
				1858	uint32_t padBottom = 0,
				1859	uint32_t strideX = 1,
				1860	uint32_t strideY = 1,
				1861	uint32_t dilationX = 1,
				1862	uint32_t dilationY = 1)
				1863	{
				1864	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				1865	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				1866	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				1867	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				1868
				1869	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				1870	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				1871	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				1872	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				1873
				1874	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				1875	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				1876	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				1877	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				1878
				1879	bool biasEnabled = bias.size() > 0;
				1880
				1881	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				1882	BOOST_ASSERT(inputNum == 1);
				1883	BOOST_ASSERT(outputNum == 1);
				1884
				1885	// If a bias is used, its size must equal the number of output channels.
				1886	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1887
				1888
				1889	// Note these tensors will use two (identical) batches.
				1890	armnn::TensorInfo inputTensorInfo =
				1891	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1892	armnn::TensorInfo outputTensorInfo =
				1893	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1894
				1895	// Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
				1896	armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1897
				1898	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1899
				1900	// Set quantization parameters if the requested type is a quantized type.
				1901	if(armnn::IsQuantizedType<T>())
				1902	{
				1903	inputTensorInfo.SetQuantizationScale(qScale);
				1904	inputTensorInfo.SetQuantizationOffset(qOffset);
				1905	outputTensorInfo.SetQuantizationScale(qScale);
				1906	outputTensorInfo.SetQuantizationOffset(qOffset);
				1907	kernelDesc.SetQuantizationScale(qScale);
				1908	kernelDesc.SetQuantizationOffset(qOffset);
				1909	biasDesc.SetQuantizationScale(qScale*qScale);
				1910	biasDesc.SetQuantizationOffset(0);
				1911	}
				1912
				1913	LayerTestResult<T, 4> ret(outputTensorInfo);
				1914
				1915	// Construct input data
				1916	std::vector<T> input;
				1917	input.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				1918	std::vector<T> inputData;
				1919	inputData.insert(inputData.end(), input.begin(), input.end());
				1920	inputData.insert(inputData.end(), input.begin(), input.end());
				1921
				1922	// at this point if we require it permute the input data
				1923	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1924	if (layout == armnn::DataLayout::NHWC)
				1925	{
				1926	std::vector<T> tmp(inputData.size());
				1927	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1928	inputData = tmp;
				1929	}
				1930
				1931	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1932
				1933	std::vector<T> output;
				1934	output.assign(originalOutputExpected.data(),
				1935	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1936
				1937	// Apply bias to output data if it is enabled.
				1938	if(biasEnabled)
				1939	{
				1940	std::vector<T> biasV;
				1941	biasV.assign(bias.data(), bias.data() + outputChannels);
				1942	ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1943	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1944	outputWidth, outputHeight);
				1945	}
				1946
				1947	// Construct expected output data
				1948	std::vector<T> outputData;
				1949	outputData.insert(outputData.end(), output.begin(), output.end());
				1950	outputData.insert(outputData.end(), output.begin(), output.end());
				1951
				1952	// at this point if we require it permute the expected output
				1953	if (layout == armnn::DataLayout::NHWC)
				1954	{
				1955	std::vector<T> tmp(outputData.size());
				1956	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1957	outputData = tmp;
				1958	}
				1959	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1960
				1961	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1962	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1963
				1964	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1965	armnn::WorkloadInfo info;
				1966	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1967	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1968
				1969	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				1970	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1971
				1972	if(biasEnabled)
				1973	{
				1974	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1975	}
				1976
				1977	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1978	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1979
				1980	data.m_Weight = &weightsTensor;
				1981	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				1982	data.m_Parameters.m_StrideX = strideX;
				1983	data.m_Parameters.m_StrideY = strideY;
				1984	data.m_Parameters.m_PadLeft = padLeft;
				1985	data.m_Parameters.m_PadRight = padRight;
				1986	data.m_Parameters.m_PadTop = padTop;
				1987	data.m_Parameters.m_PadBottom = padBottom;
				1988	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1989	data.m_Parameters.m_DataLayout = layout;
				1990	data.m_Parameters.m_DilationX = dilationX;
				1991	data.m_Parameters.m_DilationY = dilationY;
				1992
				1993	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1994	inputHandle->Allocate();
				1995	outputHandle->Allocate();
				1996
				1997	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				1998
				1999	ExecuteWorkload(*workload, memoryManager);
				2000
				2001	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2002
				2003	return ret;
				2004	}
				2005
				2006	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2007	typename T = armnn::ResolveType<ArmnnType>>
				2008	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
				2009	armnn::IWorkloadFactory& workloadFactory,
				2010	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2011	float qScale,
				2012	int32_t qOffset,
				2013	bool biasEnabled,
				2014	const armnn::DataLayout layout)
				2015	{
				2016	// Use a single-batch 2-channel 5x5 image as input.
				2017	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2018	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
				2019	QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
				2020	{
				2021	0, 1, 2, 3, 4,
				2022	5, 6, 7, 8, 9,
				2023	10, 11, 12, 13, 14,
				2024	15, 16, 17, 18, 19,
				2025	20, 21, 22, 23, 24,
				2026
				2027	25, 26, 27, 28, 29,
				2028	30, 31, 32, 33, 34,
				2029	35, 36, 37, 38, 39,
				2030	40, 41, 42, 43, 44,
				2031	45, 46, 47, 48, 49
				2032	})));
				2033
				2034	// Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
				2035	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2036	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
				2037	QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
				2038	{
				2039	32, 31, 30, 29,
				2040	28, 27, 26, 25,
				2041	24, 23, 22, 21,
				2042	20, 19, 18, 17,
				2043
				2044	16, 15, 14, 13,
				2045	12, 11, 10, 9,
				2046	8, 7, 6, 5,
				2047	4, 3, 2, 1
				2048	})));
				2049
				2050	// Expected output is 1 batch of a 2-channel 5x5 image.
				2051	// Calculated using the python tensorflow library with strideX=1, strideY=1.
				2052	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2053	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
				2054	QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				2055	{
				2056	1062, 1580, 1850, 1530, 1117,
				2057	2140, 3108, 3500, 2842, 2042,
				2058	3580, 5068, 5460, 4342, 3062,
				2059	3618, 5072, 5390, 4248, 2971,
				2060	3074, 4282, 4510, 3533, 2457,
				2061
				2062	1550, 2284, 2362, 1955, 1428,
				2063	2910, 4206, 4342, 3528, 2536,
				2064	3390, 4886, 5022, 4068, 2916,
				2065	3566, 5056, 5182, 4133, 2922,
				2066	3100, 4352, 4452, 3517, 2465
				2067	})));
				2068
				2069	return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
				2070	workloadFactory,
				2071	memoryManager,
				2072	input,
				2073	kernel,
				2074	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2075	expectedOutput,
				2076	qScale,
				2077	qOffset,
				2078	layout,
				2079	1, // Padding left.
				2080	1, // Padding top.
				2081	2, // Padding right.
				2082	2, // Padding bottom.
				2083	1, // strideX
				2084	1); // strideY
				2085	}
				2086
				2087	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2088	typename T = armnn::ResolveType<ArmnnType>>
				2089	LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
				2090	armnn::IWorkloadFactory& workloadFactory,
				2091	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2092	float qScale,
				2093	int32_t qOffset,
				2094	bool biasEnabled)
				2095	{
				2096	auto layout = armnn::DataLayout::NHWC;
				2097
				2098	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2099	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
				2100	QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
				2101	{
				2102	0, 1, 2, 3, 4,
				2103	5, 6, 7, 8, 9,
				2104	10, 11, 12, 13, 14,
				2105	15, 16, 17, 18, 19,
				2106	20, 21, 22, 23, 24,
				2107
				2108	25, 26, 27, 28, 29,
				2109	30, 31, 32, 33, 34,
				2110	35, 36, 37, 38, 39,
				2111	40, 41, 42, 43, 44,
				2112	45, 46, 47, 48, 49
				2113	})));
				2114
				2115	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2116	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
				2117	QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
				2118	{
				2119	32, 31, 30, 29,
				2120	28, 27, 26, 25,
				2121	24, 23, 22, 21,
				2122	20, 19, 18, 17,
				2123
				2124	16, 15, 14, 13,
				2125	12, 11, 10, 9,
				2126	8, 7, 6, 5,
				2127	4, 3, 2, 1
				2128	})));
				2129
				2130	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2131	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
				2132	QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				2133	{
				2134	1062, 1580, 1850, 1530, 1117,
				2135	2140, 3108, 3500, 2842, 2042,
				2136	3580, 5068, 5460, 4342, 3062,
				2137	3618, 5072, 5390, 4248, 2971,
				2138	3074, 4282, 4510, 3533, 2457,
				2139
				2140	1550, 2284, 2362, 1955, 1428,
				2141	2910, 4206, 4342, 3528, 2536,
				2142	3390, 4886, 5022, 4068, 2916,
				2143	3566, 5056, 5182, 4133, 2922,
				2144	3100, 4352, 4452, 3517, 2465
				2145	})));
				2146
				2147	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2148	workloadFactory,
				2149	memoryManager,
				2150	input,
				2151	kernel,
				2152	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2153	expectedOutput,
				2154	qScale,
				2155	qOffset,
				2156	layout,
				2157	1, // Padding left.
				2158	1, // Padding top.
				2159	2, // Padding right.
				2160	2, // Padding bottom.
				2161	1, // strideX
				2162	1); // strideY
				2163	}
				2164
				2165	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2166	typename T = armnn::ResolveType<ArmnnType>>
				2167	LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
				2168	armnn::IWorkloadFactory& workloadFactory,
				2169	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2170	float qScale,
				2171	int32_t qOffset,
				2172	bool biasEnabled)
				2173	{
				2174	auto layout = armnn::DataLayout::NHWC;
				2175
				2176	armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
				2177	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
				2178	QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
				2179	{
				2180	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2181	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2182	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2183	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2184	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2185	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2186	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2187	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2188	0, 0, 0, 0, 0, 0, 0, 0, 0
				2189	})));
				2190
				2191	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2192	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
				2193	QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
				2194	{
				2195	1, 2, 3,
				2196	4, 5, 6,
				2197	7, 8, 9
				2198	})));
				2199
				2200	uint32_t padLeft = 0;
				2201	uint32_t padTop = 0;
				2202	uint32_t padRight = 0;
				2203	uint32_t padBottom = 0;
				2204	uint32_t strideX = 1;
				2205	uint32_t strideY = 1;
				2206	uint32_t dilationX = 3;
				2207	uint32_t dilationY = 3;
				2208
				2209	// Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
				2210	armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2211	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
				2212	QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				2213	{
				2214	5, 5, 5,
				2215	5, 5, 5,
				2216	5, 5, 5
				2217	})));
				2218
				2219	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2220	workloadFactory,
				2221	memoryManager,
				2222	input,
				2223	kernel,
				2224	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2225	expectedOutput,
				2226	qScale,
				2227	qOffset,
				2228	layout,
				2229	padLeft,
				2230	padTop,
				2231	padRight,
				2232	padBottom,
				2233	strideX,
				2234	strideY,
				2235	dilationX,
				2236	dilationY);
				2237	}
				2238
				2239	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				2240	LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
				2241	armnn::IWorkloadFactory& workloadFactory,
				2242	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2243	const std::vector<float>& inputNoQuantizedValues,
				2244	armnn::TensorInfo& inputTensorInfo,
				2245	const std::vector<float>& kernelNoQuantizedValues,
				2246	armnn::TensorInfo& kernelTensorInfo,
				2247	const std::vector<float>& outputExpectedNoQuantizedValues,
				2248	armnn::TensorInfo& outputTensorInfo,
				2249	uint32_t dilationX,
				2250	uint32_t dilationY,
				2251	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				2252	bool biasEnabled = false)
				2253	{
				2254	float qScale;
				2255	int32_t qOffset;
				2256	switch (ArmnnType)
				2257	{
				2258	case armnn::DataType::QuantisedAsymm8:
				2259	{
				2260	qScale = 0.1f;
				2261	qOffset = 128;
				2262	break;
				2263	}
				2264	case armnn::DataType::QuantisedSymm16:
				2265	{
				2266	qScale = 0.1f;
				2267	qOffset = 0;
				2268	break;
				2269	}
				2270	case armnn::DataType::Float32:
				2271	default:
				2272	{
				2273	qScale = 0.f;
				2274	qOffset = 0;
				2275	break;
				2276	}
				2277	}
				2278
				2279	inputTensorInfo.SetQuantizationScale(qScale);
				2280	inputTensorInfo.SetQuantizationOffset(qOffset);
				2281	kernelTensorInfo.SetQuantizationScale(qScale);
				2282	kernelTensorInfo.SetQuantizationOffset(qOffset);
				2283	outputTensorInfo.SetQuantizationScale(qScale);
				2284	outputTensorInfo.SetQuantizationOffset(qOffset);
				2285
				2286	auto input = MakeTensor<T, 4>(inputTensorInfo,
				2287	std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
				2288	inputTensorInfo.GetQuantizationOffset(),
				2289	inputNoQuantizedValues)));
				2290	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
				2291	std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(),
				2292	kernelTensorInfo.GetQuantizationOffset(),
				2293	kernelNoQuantizedValues)));
				2294	auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo,
				2295	std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
				2296	outputTensorInfo.GetQuantizationOffset(),
				2297	outputExpectedNoQuantizedValues)));
				2298
				2299	uint32_t padLeft = 0;
				2300	uint32_t padTop = 0;
				2301	uint32_t padRight = 0;
				2302	uint32_t padBottom = 0;
				2303	uint32_t strideX = 1;
				2304	uint32_t strideY = 1;
				2305
				2306	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2307	workloadFactory,
				2308	memoryManager,
				2309	input,
				2310	kernel,
				2311	GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
				2312	expectedOutput,
				2313	qScale,
				2314	qOffset,
				2315	layout,
				2316	padLeft,
				2317	padTop,
				2318	padRight,
				2319	padBottom,
				2320	strideX,
				2321	strideY,
				2322	dilationX,
				2323	dilationY);
				2324	}
				2325
				2326	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2327	LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
				2328	armnn::IWorkloadFactory& workloadFactory,
				2329	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2330	bool biasEnabled,
				2331	const armnn::DataLayout layout)
				2332	{
				2333	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				2334	std::vector<float> inputNoQuantizedValues =
				2335	{
				2336	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2337	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2338	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2339	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2340	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2341	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2342	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2343	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2344	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2345	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2346	};
				2347
				2348	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2349	std::vector<float> kernelNoQuantizedValues =
				2350	{
				2351	1, 2, 3,
				2352	4, 5, 6,
				2353	7, 8, 9
				2354	};
				2355
				2356	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2357	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2358	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				2359	std::vector<float> outputExpectedNoQuantizedValues =
				2360	{
				2361	6., 5., 5., 5.,
				2362	6., 5., 5., 5.,
				2363	6., 5., 5., 5.,
				2364	3., 2., 2., 2.
				2365	};
				2366
				2367	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2368	workloadFactory,
				2369	memoryManager,
				2370	inputNoQuantizedValues,
				2371	inputTensorInfo,
				2372	kernelNoQuantizedValues,
				2373	kernelTensorInfo,
				2374	outputExpectedNoQuantizedValues,
				2375	outputTensorInfo,
				2376	3,
				2377	3,
				2378	layout,
				2379	biasEnabled);
				2380	}
				2381
				2382	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2383	LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
				2384	armnn::IWorkloadFactory& workloadFactory,
				2385	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2386	bool biasEnabled,
				2387	const armnn::DataLayout layout)
				2388	{
				2389	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				2390	std::vector<float> inputNoQuantizedValues =
				2391	{
				2392	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2393	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2394	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2395	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2396	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2397	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2398	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2399	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2400	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2401	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2402
				2403	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2404	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2405	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2406	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2407	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2408	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2409	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2410	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2411	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2412	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2413	};
				2414
				2415	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				2416	std::vector<float> kernelNoQuantizedValues =
				2417	{
				2418	1, 2, 3,
				2419	4, 5, 6,
				2420	7, 8, 9,
				2421
				2422	1, 2, 3,
				2423	4, 5, 6,
				2424	7, 8, 9
				2425	};
				2426
				2427	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2428	// therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2429	armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
				2430	std::vector<float> outputExpectedNoQuantizedValues =
				2431	{
				2432	6., 5., 5., 5.,
				2433	6., 5., 5., 5.,
				2434	6., 5., 5., 5.,
				2435	3., 2., 2., 2.,
				2436
				2437	6., 5., 5., 5.,
				2438	6., 5., 5., 5.,
				2439	6., 5., 5., 5.,
				2440	3., 2., 2., 2.
				2441	};
				2442
				2443	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2444	workloadFactory,
				2445	memoryManager,
				2446	inputNoQuantizedValues,
				2447	inputTensorInfo,
				2448	kernelNoQuantizedValues,
				2449	kernelTensorInfo,
				2450	outputExpectedNoQuantizedValues,
				2451	outputTensorInfo,
				2452	3,
				2453	3,
				2454	layout,
				2455	biasEnabled);
				2456	}
				2457
				2458	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2459	LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
				2460	armnn::IWorkloadFactory& workloadFactory,
				2461	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2462	bool biasEnabled,
				2463	const armnn::DataLayout layout)
				2464	{
				2465	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2466	std::vector<float> inputNoQuantizedValues =
				2467	{
				2468	10.0, 10.0, 10.0,
				2469	10.0, 10.0, 10.0,
				2470	10.0, 10.0, 10.0,
				2471
				2472	21.0, 22.0, 23.0,
				2473	24.0, 25.0, 26.0,
				2474	27.0, 28.0, 29.0
				2475	};
				2476
				2477	armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
				2478
				2479	std::vector<float> kernelNoQuantizedValues =
				2480	{
				2481	0.25f, 0.25f,
				2482	0.25f, 0.25f,
				2483
				2484	0.25f, 0.25f,
				2485	0.25f, 0.25f,
				2486
				2487	0.0f , 0.0f,
				2488	0.0f , 0.1f,
				2489
				2490	0.0f , 0.0f,
				2491	0.0f , 0.1f,
				2492
				2493	0.2f , 0.0f,
				2494	0.0f , 0.0f,
				2495
				2496	0.2f , 0.0f,
				2497	0.0f , 0.0f,
				2498
				2499	0.0f , 0.3f,
				2500	0.0f , 0.0f,
				2501
				2502	0.0f , 0.3f,
				2503	0.0f , 0.0f
				2504	};
				2505
				2506	armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
				2507	std::vector<float> outputExpectedNoQuantizedValues =
				2508	{
				2509	10.f, 10.f,
				2510	10.f, 10.f,
				2511
				2512	1.f, 1.f,
				2513	1.f, 1.f,
				2514
				2515	2.f, 2.f,
				2516	2.f, 2.f,
				2517
				2518	3.f, 3.f,
				2519	3.f, 3.f,
				2520
				2521	23.f, 24.f,
				2522	26.f, 27.f,
				2523
				2524	2.5f, 2.6000001f,
				2525	2.8f, 2.9f,
				2526
				2527	4.2000003f, 4.4f,
				2528	4.8f, 5.f,
				2529
				2530	6.6000004f, 6.9f,
				2531	7.5000005f, 7.8f
				2532	};
				2533
				2534
				2535	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2536	workloadFactory,
				2537	memoryManager,
				2538	inputNoQuantizedValues,
				2539	inputTensorInfo,
				2540	kernelNoQuantizedValues,
				2541	kernelTensorInfo,
				2542	outputExpectedNoQuantizedValues,
				2543	outputTensorInfo,
				2544	1,
				2545	1,
				2546	layout,
				2547	biasEnabled);
				2548	}
				2549
				2550	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2551	LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
				2552	armnn::IWorkloadFactory& workloadFactory,
				2553	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2554	bool biasEnabled,
				2555	const armnn::DataLayout layout)
				2556	{
				2557	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2558	std::vector<float> inputNoQuantizedValues =
				2559	{
				2560	10.0, 10.0, 10.0,
				2561	10.0, 10.0, 10.0,
				2562	10.0, 10.0, 10.0,
				2563
				2564	21.0, 22.0, 23.0,
				2565	24.0, 25.0, 26.0,
				2566	27.0, 28.0, 29.0
				2567	};
				2568
				2569	armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
				2570
				2571	std::vector<float> kernelNoQuantizedValues =
				2572	{
				2573	0.25f, 0.25f,
				2574	0.25f, 0.25f,
				2575
				2576	0.2f , 0.0f,
				2577	0.0f , 0.0f,
				2578
				2579	0.0f , 0.0f,
				2580	0.0f , 0.1f,
				2581
				2582	0.0f , 0.3f,
				2583	0.0f , 0.0f
				2584
				2585	};
				2586
				2587	armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
				2588	std::vector<float> outputExpectedNoQuantizedValues =
				2589	{
				2590	10.f, 10.f,
				2591	10.f, 10.f,
				2592
				2593	1.f, 1.f,
				2594	1.f, 1.f,
				2595
				2596	4.2000003f, 4.4f,
				2597	4.8f, 5.f,
				2598
				2599	6.6000004f, 6.9f,
				2600	7.5000005f, 7.8f
				2601	};
				2602
				2603
				2604	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2605	workloadFactory,
				2606	memoryManager,
				2607	inputNoQuantizedValues,
				2608	inputTensorInfo,
				2609	kernelNoQuantizedValues,
				2610	kernelTensorInfo,
				2611	outputExpectedNoQuantizedValues,
				2612	outputTensorInfo,
				2613	1,
				2614	1,
				2615	layout,
				2616	biasEnabled);
				2617	}
				2618
				2619	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				2620	LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
				2621	armnn::IWorkloadFactory& workloadFactory,
				2622	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2623	armnn::IWorkloadFactory& refWorkloadFactory,
				2624	const armnnUtils::DataLayoutIndexed& layout)
				2625	{
				2626	unsigned int inputHeight = 8;
				2627	unsigned int inputWidth = 16;
				2628	unsigned int inputChannels = 3;
				2629	unsigned int inputNum = 5;
				2630
				2631	unsigned int kernelHeight = 3;
				2632	unsigned int kernelWidth = 3;
				2633	unsigned int channelMultiplier = 1;
				2634
				2635	unsigned int strideX = 2;
				2636	unsigned int strideY = 3;
				2637	unsigned int padX = 1;
				2638	unsigned int padY = 1;
				2639
				2640	unsigned int outputNum = inputNum;
				2641	unsigned int outputChannels = inputChannels * channelMultiplier;
				2642	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				2643	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				2644
				2645	armnn::TensorInfo inputTensorInfo;
				2646	armnn::TensorInfo outputTensorInfo;
				2647	armnn::TensorInfo kernelDesc;
				2648	armnn::TensorInfo biasDesc;
				2649
				2650
				2651	std::vector<unsigned int> inputShape;
				2652	std::vector<unsigned int> outputShape;
				2653	std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
				2654	std::vector<unsigned int> biasShape{ outputChannels };
				2655	switch (layout.GetDataLayout())
				2656	{
				2657	case armnn::DataLayout::NCHW:
				2658	inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
				2659	outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
				2660	break;
				2661	case armnn::DataLayout ::NHWC:
				2662	inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
				2663	outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
				2664	break;
				2665	default:
				2666	throw armnn::InvalidArgumentException("unknown data layout ["
				2667	+ std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
				2668	}
				2669
				2670	float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
				2671	float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
				2672	int32_t qOffset = 0;
				2673
				2674	inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
				2675	outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
				2676	kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
				2677	biasDesc = armnn::TensorInfo(
				2678	1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
				2679
				2680	LayerTestResult<T, 4> ret(outputTensorInfo);
				2681
				2682	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
				2683	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
				2684	auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
				2685	biasDesc, 1028, 0.0f, 255.0f);
				2686
				2687	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2688	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2689
				2690	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2691	armnn::WorkloadInfo info;
				2692	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2693	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2694
				2695	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2696	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2697
				2698	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2699	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2700	data.m_Weight = &weightsTensor;
				2701	data.m_Bias = &biasTensor;
				2702	data.m_Parameters.m_StrideX = strideX;
				2703	data.m_Parameters.m_StrideY = strideY;
				2704	data.m_Parameters.m_PadLeft = padX;
				2705	data.m_Parameters.m_PadRight = padX;
				2706	data.m_Parameters.m_PadTop = padY;
				2707	data.m_Parameters.m_PadBottom = padY;
				2708	data.m_Parameters.m_BiasEnabled = true;
				2709	data.m_Parameters.m_DataLayout = layout.GetDataLayout();
				2710
				2711	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				2712	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				2713
				2714	armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
				2715	armnn::WorkloadInfo refInfo = info;
				2716	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				2717	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				2718
				2719	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2720	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
				2721
				2722	outputHandleRef->Allocate();
				2723	inputHandleRef->Allocate();
				2724
				2725	inputHandle->Allocate();
				2726	outputHandle->Allocate();
				2727
				2728	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				2729	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				2730
				2731	ExecuteWorkload(*workload, memoryManager);
				2732
				2733	workloadRef->PostAllocationConfigure();
				2734	workloadRef->Execute();
				2735
				2736	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2737	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				2738
				2739	return ret;
				2740	}
				2741
				2742	//
				2743	// Explicit template specializations
				2744	//
				2745
				2746	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2747	Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2748	armnn::IWorkloadFactory&,
				2749	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2750	bool,
				2751	armnn::DataLayout);
				2752
				2753	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2754	Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2755	armnn::IWorkloadFactory&,
				2756	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2757	bool,
				2758	armnn::DataLayout);
				2759
				2760	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2761	Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2762	armnn::IWorkloadFactory&,
				2763	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2764	bool,
				2765	armnn::DataLayout);
				2766
				2767	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2768	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2769	armnn::IWorkloadFactory&,
				2770	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2771	bool,
				2772	armnn::DataLayout);
				2773
				2774	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2775	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2776	armnn::IWorkloadFactory&,
				2777	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2778	bool,
				2779	armnn::DataLayout);
				2780
				2781	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2782	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2783	armnn::IWorkloadFactory&,
				2784	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2785	bool,
				2786	armnn::DataLayout);
				2787
				2788	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2789	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2790	armnn::IWorkloadFactory &workloadFactory,
				2791	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2792	bool biasEnabled,
				2793	const armnn::DataLayout layout);
				2794
				2795	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2796	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2797	armnn::IWorkloadFactory &workloadFactory,
				2798	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2799	bool biasEnabled,
				2800	const armnn::DataLayout layout);
				2801
				2802	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2803	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2804	armnn::IWorkloadFactory &workloadFactory,
				2805	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2806	bool biasEnabled,
				2807	const armnn::DataLayout layout);
				2808
				2809	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2810	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2811	armnn::IWorkloadFactory&,
				2812	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2813	bool,
				2814	armnn::DataLayout);
				2815
				2816	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2817	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2818	armnn::IWorkloadFactory&,
				2819	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2820	bool,
				2821	armnn::DataLayout);
				2822
				2823	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2824	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2825	armnn::IWorkloadFactory&,
				2826	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2827	bool,
				2828	armnn::DataLayout);
				2829
				2830	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2831	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2832	armnn::IWorkloadFactory&,
				2833	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2834	bool,
				2835	armnn::DataLayout);
				2836
				2837	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2838	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2839	armnn::IWorkloadFactory&,
				2840	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2841	bool,
				2842	armnn::DataLayout);
				2843
				2844	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2845	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2846	armnn::IWorkloadFactory&,
				2847	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2848	bool,
				2849	armnn::DataLayout);
				2850
				2851	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2852	DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2853	armnn::IWorkloadFactory &workloadFactory,
				2854	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2855	bool biasEnabled,
				2856	const armnn::DataLayout layout);
				2857
				2858	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2859	DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2860	armnn::IWorkloadFactory &workloadFactory,
				2861	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2862	bool biasEnabled,
				2863	const armnn::DataLayout layout);
				2864
				2865	//
				2866	// Implementation functions
				2867	//
				2868
				2869	LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
				2870	armnn::IWorkloadFactory& workloadFactory,
				2871	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2872	bool biasEnabled,
				2873	const armnn::DataLayout layout)
				2874	{
				2875	return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2876	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2877	}
				2878
				2879	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
				2880	armnn::IWorkloadFactory& workloadFactory,
				2881	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2882	bool biasEnabled,
				2883	const armnn::DataLayout layout)
				2884	{
				2885	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2886	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2887	}
				2888
				2889	LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
				2890	armnn::IWorkloadFactory& workloadFactory,
				2891	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2892	bool biasEnabled,
				2893	const armnn::DataLayout layout)
				2894	{
				2895	return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2896	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2897	}
				2898
				2899	LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
				2900	armnn::IWorkloadFactory& workloadFactory,
				2901	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2902	bool biasEnabled)
				2903	{
				2904	return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
				2905	workloadFactory,
				2906	memoryManager,
				2907	0.f,
				2908	0,
				2909	biasEnabled,
				2910	armnn::DataLayout::NHWC);
				2911	}
				2912
				2913	LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
				2914	armnn::IWorkloadFactory& workloadFactory,
				2915	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2916	bool biasEnabled,
				2917	const armnn::DataLayout layout)
				2918	{
				2919	return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
				2920	workloadFactory,
				2921	memoryManager,
				2922	0.f,
				2923	0,
				2924	biasEnabled,
				2925	layout);
				2926	}
				2927
				2928	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
				2929	armnn::IWorkloadFactory& workloadFactory,
				2930	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2931	bool biasEnabled,
				2932	const armnn::DataLayout layout)
				2933	{
				2934	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2935	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2936	}
				2937
				2938	LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
				2939	armnn::IWorkloadFactory& workloadFactory,
				2940	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2941	bool biasEnabled,
				2942	const armnn::DataLayout layout)
				2943	{
				2944	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2945	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2946	}
				2947
				2948	LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
				2949	armnn::IWorkloadFactory& workloadFactory,
				2950	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2951	bool biasEnabled,
				2952	const armnn::DataLayout layout)
				2953	{
				2954	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2955	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2956	}
				2957
				2958	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
				2959	armnn::IWorkloadFactory& workloadFactory,
				2960	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2961	armnn::DataLayout layout)
				2962	{
				2963	return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2964	workloadFactory, memoryManager, layout, 0.0f, 0);
				2965	}
				2966
				2967	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
				2968	armnn::IWorkloadFactory& workloadFactory,
				2969	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2970	armnn::DataLayout layout)
				2971	{
				2972	return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
				2973	<armnn::DataType::Float32, armnn::DataType::Float32>(
				2974	workloadFactory, memoryManager, layout, 0.0f, 0);
				2975	}
				2976
				2977	LayerTestResult<float, 4> Convolution1dTest(
				2978	armnn::IWorkloadFactory& workloadFactory,
				2979	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2980	bool biasEnabled)
				2981	{
				2982	return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				2983	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				2984	}
				2985
				2986	LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
				2987	armnn::IWorkloadFactory& workloadFactory,
				2988	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2989	bool biasEnabled)
				2990	{
				2991	return Convolution1dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2992	workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
				2993	}
				2994
				2995	LayerTestResult<float,4> CompareConvolution2dTest(
				2996	armnn::IWorkloadFactory& workloadFactory,
				2997	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2998	armnn::IWorkloadFactory& refWorkloadFactory)
				2999	{
				3000	return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
				3001	workloadFactory, memoryManager, refWorkloadFactory);
				3002	}
				3003
				3004	LayerTestResult<float, 4> DepthwiseConvolution2dTest(
				3005	armnn::IWorkloadFactory& workloadFactory,
				3006	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3007	bool biasEnabled,
				3008	const armnn::DataLayout layout)
				3009	{
				3010	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3011	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3012	}
				3013
				3014	LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
				3015	armnn::IWorkloadFactory& workloadFactory,
				3016	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3017	bool biasEnabled)
				3018	{
				3019	return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3020	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3021	}
				3022
				3023	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
				3024	armnn::IWorkloadFactory& workloadFactory,
				3025	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3026	bool biasEnabled,
				3027	const armnn::DataLayout layout)
				3028	{
				3029	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3030	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3031	}
				3032
				3033	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
				3034	armnn::IWorkloadFactory& workloadFactory,
				3035	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3036	{
				3037	armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
				3038	auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
				3039
				3040	std::vector<float> kernelData;
				3041	std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
				3042	for (unsigned int i = 0; i < 64; ++i)
				3043	{
				3044	kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
				3045	}
				3046	armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
				3047	auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
				3048
				3049	std::vector<float> expectedOutputData(64, 0.f);
				3050	armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
				3051	auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
				3052
				3053	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3054	workloadFactory,
				3055	memoryManager,
				3056	input,
				3057	kernel,
				3058	boost::multi_array<float, 1>(),
				3059	expectedOutput,
				3060	0.f,
				3061	0,
				3062	armnn::DataLayout::NCHW);
				3063	}
				3064
				3065	LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
				3066	armnn::IWorkloadFactory& workloadFactory,
				3067	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3068	bool biasEnabled,
				3069	const armnn::DataLayout layout)
				3070	{
				3071	return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3072	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3073	}
				3074
				3075	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
				3076	armnn::IWorkloadFactory& workloadFactory,
				3077	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3078	bool biasEnabled,
				3079	const armnn::DataLayout layout)
				3080	{
				3081	return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3082	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3083	}
				3084
				3085	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
				3086	armnn::IWorkloadFactory& workloadFactory,
				3087	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3088	bool biasEnabled,
				3089	const armnn::DataLayout layout)
				3090	{
				3091	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3092	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3093	}
				3094
				3095	LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
				3096	armnn::IWorkloadFactory& workloadFactory,
				3097	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3098	{
				3099	return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3100	workloadFactory,
				3101	memoryManager,
				3102	0.f,
				3103	0,
				3104	false);
				3105	}
				3106
				3107	LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
				3108	armnn::IWorkloadFactory& workloadFactory,
				3109	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3110	bool biasEnabled,
				3111	const armnn::DataLayout layout)
				3112	{
				3113	return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				3114	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3115	}
				3116
				3117	LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
				3118	armnn::IWorkloadFactory& workloadFactory,
				3119	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3120	bool biasEnabled,
				3121	const armnn::DataLayout layout)
				3122	{
				3123	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				3124	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3125	}
				3126
				3127	LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
				3128	armnn::IWorkloadFactory& workloadFactory,
				3129	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3130	armnn::IWorkloadFactory& refWorkloadFactory,
				3131	const armnn::DataLayout layout)
				3132	{
				3133	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
				3134	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3135	}
				3136
				3137	LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
				3138	armnn::IWorkloadFactory& workloadFactory,
				3139	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3140	armnn::IWorkloadFactory& refWorkloadFactory,
				3141	const armnn::DataLayout layout)
				3142	{
				3143	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8>(
				3144	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3145	}