Blame - src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp - ml/armnn

blob: 22e7e29db7c00d1257dc3317dedb025e5348d394 [file] [log] [blame]

Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "Conv2dTestImpl.hpp"
				7
				8	#include <DataLayoutIndexed.hpp>
				9	#include <Permute.hpp>
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	10	#include <QuantizeHelper.hpp>
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	11	#include <TensorUtils.hpp>
				12
				13	#include <armnn/ArmNN.hpp>
				14
				15	#include <backendsCommon/CpuTensorHandle.hpp>
				16
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	17	#include <backendsCommon/test/DataLayoutUtils.hpp>
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	18	#include <backendsCommon/test/TensorCopyUtils.hpp>
				19	#include <backendsCommon/test/WorkloadTestUtils.hpp>
				20
				21	#include <test/TensorHelpers.hpp>
				22
				23	#include <boost/numeric/conversion/cast.hpp>
				24
				25	#include <string>
				26
				27	//
				28	// Static data
				29	//
				30
				31	// 2-channel bias used by a number of Conv2d tests.
				32	static std::vector<float> Bias2({0, 2});
				33
				34	static std::vector<float> Bias4({1, 2, 3, 4});
				35
				36	static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
				37
				38	// 3-channel 16x8 image used as common input data for a number of Conv2d tests.
				39	static std::vector<float> ConvInput3x8x16({
				40	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				41	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				42	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				43	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				44	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				45	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				46	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				47	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				48	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				49	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				50	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				51	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				52	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				53	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				54	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				55	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				56	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				57	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				58	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				59	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				60	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				61	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				62	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				63	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
				64	});
				65
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	66	using namespace armnnUtils;
				67
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	68	//
				69	// Helper templates
				70	//
				71
				72	// Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
				73	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				74	boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
				75	{
				76	if(biasEnabled)
				77	{
				78	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	79	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias2, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	80	return bias;
				81	}
				82	else
				83	{
				84	return boost::multi_array<T, 1>();
				85	}
				86	}
				87
				88	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				89	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				90	boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
				91	{
				92	if(biasEnabled)
				93	{
				94	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	95	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias4, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	96	return bias;
				97	}
				98	else
				99	{
				100	return boost::multi_array<T, 1>();
				101	}
				102	}
				103
				104	// Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
				105	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				106	boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
				107	{
				108	if(biasEnabled)
				109	{
				110	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	111	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias8, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	112	return bias;
				113	}
				114	else
				115	{
				116	return boost::multi_array<T, 1>();
				117	}
				118	}
				119
				120	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				121	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				122	boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
				123	{
				124	const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
				125	const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
				126	const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
				127
				128	switch (outputChannels)
				129	{
				130	case 2:
				131	default:
				132	{
				133	return GetBias2<ArmnnType>(biasEnabled, qScale);
				134	}
				135	case 4:
				136	{
				137	return GetBias4<ArmnnType>(biasEnabled, qScale);
				138	}
				139	case 8:
				140	{
				141	return GetBias8<ArmnnType>(biasEnabled, qScale);
				142	}
				143	}
				144	}
				145
				146	//
				147	// Implementation templates
				148	//
				149
				150	// Mapping from input type to bias type for fully connected layers.
				151	// float => float, uint8_t => int32_t
				152	template<typename T>
				153	struct FullyConnectedBiasTypeForInputType;
				154
				155	template<>
				156	struct FullyConnectedBiasTypeForInputType<float>
				157	{
				158	using Type = float;
				159	};
				160
				161	template<>
				162	struct FullyConnectedBiasTypeForInputType<uint8_t>
				163	{
				164	using Type = int32_t;
				165	};
				166
				167	// Modifies a std::vector in-place using a specified bias.
				168	template<typename T, typename B>
				169	void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
				170	const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
				171	{
				172	BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) \|\| (!armnn::IsQuantizedType<T>()),
				173	"Invalid type and parameter combination.");
				174	BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) \|\| (!armnn::IsQuantizedType<B>()),
				175	"Invalid type and parameter combination.");
				176
				177	// Note we need to dequantize and re-quantize the image value and the bias.
				178	for (uint32_t i = 0; i < bias.size(); ++i)
				179	{
				180	float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
				181	for (uint32_t y = 0; y < h; ++y)
				182	{
				183	for (uint32_t x = 0; x < w; ++x)
				184	{
				185	uint32_t offset = (i * h + y) * w + x;
				186	BOOST_ASSERT(offset < v.size());
				187	T& outRef = v[offset];
				188	float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
				189	outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
				190	}
				191	}
				192	}
				193	}
				194
				195	//
				196	// Convolution2d implementations
				197	//
				198
				199	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				200	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				201	LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
				202	armnn::IWorkloadFactory& workloadFactory,
				203	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				204	const boost::multi_array<T, 4>& originalInput,
				205	const boost::multi_array<T, 4>& originalKernel,
				206	const boost::multi_array<B, 1>& bias,
				207	const boost::multi_array<T, 4>& originalOutputExpected,
				208	float qScale,
				209	int32_t qOffset,
				210	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				211	uint32_t padLeft = 0,
				212	uint32_t padTop = 0,
				213	uint32_t padRight = 0,
				214	uint32_t padBottom = 0,
				215	uint32_t strideX = 1,
				216	uint32_t strideY = 1,
				217	uint32_t dilationX = 1,
				218	uint32_t dilationY = 1)
				219	{
				220	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				221	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				222	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				223	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				224
				225	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				226	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				227	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				228	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				229
				230	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				231	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				232	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				233	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				234
				235	bool biasEnabled = bias.size() > 0;
				236
				237	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				238	BOOST_ASSERT(inputNum == 1);
				239	BOOST_ASSERT(outputNum == 1);
				240
				241	// If a bias is used, its size must equal the number of output channels.
				242	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				243
				244
				245	// Note these tensors will use two (identical) batches.
				246	armnn::TensorInfo inputTensorInfo =
				247	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				248	armnn::TensorInfo outputTensorInfo =
				249	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				250	armnn::TensorInfo kernelDesc =
				251	armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
				252	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				253
				254	// Set quantization parameters if the requested type is a quantized type.
				255	if(armnn::IsQuantizedType<T>())
				256	{
				257	inputTensorInfo.SetQuantizationScale(qScale);
				258	inputTensorInfo.SetQuantizationOffset(qOffset);
				259	outputTensorInfo.SetQuantizationScale(qScale);
				260	outputTensorInfo.SetQuantizationOffset(qOffset);
				261	kernelDesc.SetQuantizationScale(qScale);
				262	kernelDesc.SetQuantizationOffset(qOffset);
				263	biasDesc.SetQuantizationScale(qScale*qScale);
				264	biasDesc.SetQuantizationOffset(0);
				265	}
				266
				267	LayerTestResult<T, 4> ret(outputTensorInfo);
				268
				269	// Construct input data - two batches of the same input image.
				270	std::vector<T> inputImage;
				271	inputImage.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				272	std::vector<T> inputData;
				273	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				274	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				275
				276	// at this point if we require it permute the input data
				277	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				278	if (layout == armnn::DataLayout::NHWC)
				279	{
				280	std::vector<T> tmp(inputData.size());
				281	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				282	inputData = tmp;
				283	}
				284
				285	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				286
				287	std::vector<T> outputImage;
				288	outputImage.assign(originalOutputExpected.data(),
				289	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				290
				291	// Apply bias to output image if it is enabled.
				292	if(biasEnabled)
				293	{
				294	std::vector<T> biasV;
				295	biasV.assign(bias.data(), bias.data() + outputChannels);
				296	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				297	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				298	outputWidth, outputHeight);
				299	}
				300
				301	// Construct expected output data - two identical images.
				302	std::vector<T> outputData;
				303	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				304	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				305
				306	// at this point if we require it permute the expected output
				307	if (layout == armnn::DataLayout::NHWC)
				308	{
				309	std::vector<T> tmp(outputData.size());
				310	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				311	outputData = tmp;
				312	}
				313	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				314
				315	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				316	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				317
				318	armnn::Convolution2dQueueDescriptor data;
				319	armnn::WorkloadInfo info;
				320	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				321	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				322	// Permute the kernel if necessary
				323	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				324	if (layout == armnn::DataLayout::NHWC)
				325	{
				326	armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
				327	}
				328	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				329
				330	if(biasEnabled)
				331	{
				332	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				333	}
				334
				335	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				336	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				337
				338	data.m_Weight = &weightsTensor;
				339	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				340	data.m_Parameters.m_StrideX = strideX;
				341	data.m_Parameters.m_StrideY = strideY;
				342	data.m_Parameters.m_PadLeft = padLeft;
				343	data.m_Parameters.m_PadRight = padRight;
				344	data.m_Parameters.m_PadTop = padTop;
				345	data.m_Parameters.m_PadBottom = padBottom;
				346	data.m_Parameters.m_BiasEnabled = biasEnabled;
				347	data.m_Parameters.m_DataLayout = layout;
				348	data.m_Parameters.m_DilationX = dilationX;
				349	data.m_Parameters.m_DilationY = dilationY;
				350
				351	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				352	inputHandle->Allocate();
				353	outputHandle->Allocate();
				354
				355	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				356
				357	ExecuteWorkload(*workload, memoryManager);
				358
				359	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				360
				361	return ret;
				362	}
				363
				364	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				365	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				366	LayerTestResult<T, 4> SimpleConvolution2dNhwcTestImpl(
				367	armnn::IWorkloadFactory& workloadFactory,
				368	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				369	const boost::multi_array<T, 4>& input,
				370	const boost::multi_array<T, 4>& kernel,
				371	const boost::multi_array<B, 1>& bias,
				372	const boost::multi_array<T, 4>& outputExpected,
				373	const armnn::DataLayout dataLayout,
				374	float qScale,
				375	int32_t qOffset,
				376	uint32_t padLeft = 1,
				377	uint32_t padTop = 1,
				378	uint32_t padRight = 1,
				379	uint32_t padBottom = 1,
				380	uint32_t strideX = 1,
				381	uint32_t strideY = 1)
				382	{
				383	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				384	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
				385	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
				386	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
				387
				388	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				389	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				390	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				391	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				392
				393	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				394	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				395	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				396	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				397
				398	bool biasEnabled = bias.size() > 0;
				399
				400	// Creates the tensors.
				401	armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
				402	armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
				403	ArmnnType);
				404	armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
				405	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				406
				407	// Construct the input data.
				408	std::vector<T> inputData;
				409	inputData.assign(input.data(), input.data() + inputHeightinputWidthinputChannels);
				410	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				411
				412	// Construct the output data, with bias applied, as appropriate.
				413	std::vector<T> outputData;
				414	outputData.assign(outputExpected.data(), outputExpected.data() + outputHeightoutputWidthoutputChannels);
				415
				416	LayerTestResult<T, 4> ret(outputTensorInfo);
				417	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				418
				419	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				420	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				421
				422	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				423	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				424
				425	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				426
				427	armnn::Convolution2dQueueDescriptor data;
				428
				429	data.m_Weight = &weightsTensor;
				430	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				431	data.m_Parameters.m_StrideX = strideX;
				432	data.m_Parameters.m_StrideY = strideY;
				433	data.m_Parameters.m_PadLeft = padLeft;
				434	data.m_Parameters.m_PadRight = padRight;
				435	data.m_Parameters.m_PadTop = padTop;
				436	data.m_Parameters.m_PadBottom = padBottom;
				437	data.m_Parameters.m_BiasEnabled = biasEnabled;
				438	data.m_Parameters.m_DataLayout = dataLayout;
				439
				440	armnn::WorkloadInfo info;
				441	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				442	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				443
				444	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				445	inputHandle->Allocate();
				446	outputHandle->Allocate();
				447
				448	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				449
				450	ExecuteWorkload(*workload, memoryManager);
				451
				452	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				453
				454	return ret;
				455	}
				456
				457	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				458	LayerTestResult<T,4> Convolution1dTestImpl(
				459	armnn::IWorkloadFactory& workloadFactory,
				460	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				461	float qScale,
				462	int32_t qOffset,
				463	bool biasEnabled)
				464	{
				465	using B = armnn::ResolveType<ArmnnBType>;
				466	// Until we have a specialist 1D convolution layer, we can fake one using
				467	// 2D convolution with the final dimension set to 1.
				468	// I don't anticipate this being particularly slow, given that convolution is implemented
				469	// as a matrix multiplication, at which point dimension doesn't matter.
				470
				471	unsigned int batchSize = 1;
				472	unsigned int inputChannels = 2;
				473	unsigned int outputChannels = 3;
				474	unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
				475	unsigned int kernelSize = 3;
				476	unsigned int padSize = 2;
				477	unsigned int stride = 1;
				478	unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
				479
				480	armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
				481	armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
				482	armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
				483	armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
				484
				485	// Set quantization parameters if the requested type is a quantized type.
				486	if(armnn::IsQuantizedType<T>())
				487	{
				488	inputInfo.SetQuantizationScale(qScale);
				489	inputInfo.SetQuantizationOffset(qOffset);
				490	outputInfo.SetQuantizationScale(qScale);
				491	outputInfo.SetQuantizationOffset(qOffset);
				492	kernelInfo.SetQuantizationScale(qScale);
				493	kernelInfo.SetQuantizationOffset(qOffset);
				494	biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
				495	biasInfo.SetQuantizationOffset(0);
				496	}
				497
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	498	std::vector<T> inputData = QuantizedVector<T>(
				499	{
				500	5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
				501	-3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
				502	},
				503	inputInfo.GetQuantizationScale(),
				504	inputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	505
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	506	std::vector<T> kernelData = QuantizedVector<T>(
				507	{
				508	1.0f, 0.0f, 0.0f,
				509	0.0f, 2.0f, -1.5f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	510
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	511	0.0f, 0.0f, 0.0f,
				512	0.2f, 0.2f, 0.2f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	513
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	514	0.5f, 0.0f, 0.5f,
				515	0.0f, -1.0f, 0.0f
				516	},
				517	kernelInfo.GetQuantizationScale(),
				518	kernelInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	519
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	520	std::vector<B> biasData =
				521	QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	522
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	523	std::vector<T> outputData = QuantizedVector<T>(
				524	{
				525	4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	526	-0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	527	2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
				528	},
				529	outputInfo.GetQuantizationScale(),
				530	outputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	531
				532	// Optionally apply bias to output image.
				533	if(biasEnabled)
				534	{
				535	ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
				536	biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
				537	1, outputSize);
				538	}
				539
				540	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				541	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				542
				543	armnn::Convolution2dQueueDescriptor data;
				544	armnn::WorkloadInfo info;
				545	armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
				546	armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
				547
				548	AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
				549	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				550
				551	AddInputToWorkload(data, info, inputInfo, inputHandle.get());
				552	AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
				553
				554	data.m_Weight = &weightsTensor;
				555	data.m_Bias = &biasTensor;
				556	data.m_Parameters.m_StrideX = 1;
				557	data.m_Parameters.m_StrideY = stride;
				558	data.m_Parameters.m_PadLeft = 0;
				559	data.m_Parameters.m_PadRight = 0;
				560	data.m_Parameters.m_PadTop = padSize;
				561	data.m_Parameters.m_PadBottom = padSize;
				562	data.m_Parameters.m_BiasEnabled = biasEnabled;
				563
				564	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				565	inputHandle->Allocate();
				566	outputHandle->Allocate();
				567
				568	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				569
				570	ExecuteWorkload(*workload, memoryManager);
				571
				572	// Output
				573	LayerTestResult<T,4> ret(outputInfo);
				574	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				575	ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
				576	return ret;
				577	}
				578
				579	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				580	LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
				581	armnn::IWorkloadFactory& workloadFactory,
				582	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				583	float qScale,
				584	int32_t qOffset,
				585	bool biasEnabled,
				586	armnn::DataLayout dataLayout)
				587	{
				588	// Use common single-batch 5x5 image.
				589
				590	armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
				591	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				592	{
				593	1, 5, 2, 3,
				594	8, 7, 3, 6,
				595	3, 3, 9, 1
				596	});
				597
				598
				599	// Use a 2-element batch of 3-channel 3x3 kernels.
				600	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				601	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
				602	4, 5, 6,
				603	0, 0, 0,
				604	3, 2, 1
				605	});
				606
				607	// Expected output is 1 batch of a 5x5 image.
				608	armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
				609
				610	const std::vector<float> outputData =
				611	{
				612	23, 41, 33, 21,
				613	44, 65, 76, 52,
				614	82, 85, 79, 42
				615	};
				616
				617	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				618
				619	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				620	workloadFactory,
				621	memoryManager,
				622	input,
				623	kernel,
				624	boost::multi_array<T, 1>(),
				625	expectedOutput,
				626	dataLayout,
				627	qScale,
				628	qOffset);
				629	}
				630
				631	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				632	LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
				633	armnn::IWorkloadFactory& workloadFactory,
				634	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				635	float qScale,
				636	int32_t qOffset,
				637	bool biasEnabled,
				638	const armnn::DataLayout& dataLayout)
				639	{
				640	// Input is a single-batch, 1 channel, 5x5 image.
				641	armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
				642	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				643	{
				644	1, 5, 2, 3, 5,
				645	8, 7, 3, 6, 3,
				646	3, 3, 9, 1, 9,
				647	4, 1, 8, 1, 3,
				648	6, 8, 1, 9, 2
				649	});
				650
				651	// Use a 3x3 kernel.
				652	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				653	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
				654	{
				655	4, 5, 6,
				656	0, 0, 0,
				657	3, 2, 1
				658	});
				659
				660	// Expected output is a single-batch, 1 channel, 3x3 image.
				661	armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
				662
				663	const std::vector<T> outputData =
				664	{
				665	23, 33, 24,
				666	91, 99, 48,
				667	26, 50, 19
				668	};
				669
				670	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				671
				672	uint32_t padLeft = 1;
				673	uint32_t padTop = 1;
				674	uint32_t padRight = 1;
				675	uint32_t padBottom = 1;
				676	uint32_t strideX = 2;
				677	uint32_t strideY = 2;
				678
				679	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				680	workloadFactory,
				681	memoryManager,
				682	input,
				683	kernel,
				684	boost::multi_array<T, 1>(),
				685	expectedOutput,
				686	dataLayout,
				687	qScale,
				688	qOffset,
				689	padLeft,
				690	padTop,
				691	padRight,
				692	padBottom,
				693	strideX,
				694	strideY);
				695	}
				696
				697	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				698	LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
				699	armnn::IWorkloadFactory& workloadFactory,
				700	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				701	float qScale,
				702	int32_t qOffset,
				703	bool biasEnabled,
				704	const armnn::DataLayout layout)
				705	{
				706	// Use common single-batch 3-channel 16x8 image.
				707	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	708	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	709
				710	// Use a 2-element batch with 3-channel 3x5 kernels.
				711	armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
				712	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	713	QuantizedVector<T>({
				714	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	715	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	716	1, 1, 1,
				717	1, 1, 1,
				718	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	719
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	720	0, 0, 0,
				721	0, 0, 0,
				722	0, 0, 0,
				723	0, 0, 0,
				724	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	725
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	726	2, 2, 2,
				727	2, 2, 2,
				728	2, 2, 2,
				729	2, 2, 2,
				730	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	731
				732
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	733	0, 0, 0,
				734	0, 0, 0,
				735	0, 0, 0,
				736	0, 0, 0,
				737	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	738
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	739	1, 1, 1,
				740	1, 1, 1,
				741	1, 1, 1,
				742	1, 1, 1,
				743	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	744
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	745	0, 0, 0,
				746	0, 0, 0,
				747	0, 0, 0,
				748	0, 0, 0,
				749	0, 0, 0
				750	},
				751	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	752
				753	// Expected output is 2 batch elements of a 1-channel 14x4 image.
				754	armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
				755	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	756	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	757	-24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
				758	-25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
				759	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				760	-23.5f, -23.5f, -23.5f,
				761	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				762	-23.5f, -23.5f, -23.5f,
				763
				764	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				765	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				766	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				767	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	768	},
				769	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	770
				771	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				772	workloadFactory,
				773	memoryManager,
				774	input,
				775	kernel,
				776	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				777	expectedOutput,
				778	qScale,
				779	qOffset,
				780	layout);
				781	}
				782
				783	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				784	typename T = armnn::ResolveType<ArmnnType>>
				785	LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
				786	armnn::IWorkloadFactory& workloadFactory,
				787	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				788	float qScale,
				789	int32_t qOffset,
				790	bool biasEnabled,
				791	const armnn::DataLayout layout)
				792	{
				793	// Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
				794
				795	// Use common single-batch 3-channel 16x8 image.
				796	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	797	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	798
				799	// Use a 2-element batch of 3-channel 3x3 kernels.
				800	armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
				801	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	802	QuantizedVector<T>({
				803	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	804	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	805	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	806
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	807	0, 0, 0,
				808	0, 0, 0,
				809	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	810
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	811	2, 2, 2,
				812	2, 2, 2,
				813	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	814
				815
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	816	0, 0, 0,
				817	0, 0, 0,
				818	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	819
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	820	1, 1, 1,
				821	1, 1, 1,
				822	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	823
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	824	0, 0, 0,
				825	0, 0, 0,
				826	0, 0, 0
				827	},
				828	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	829
				830	// Expected output is 1 batch of a 2-channel 14x6 image.
				831	armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
				832	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	833	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	834	-15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
				835	-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
				836	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				837	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				838	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				839	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				840
				841	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				842	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				843	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				844	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				845	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				846	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	847	},
				848	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	849
				850	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				851	workloadFactory,
				852	memoryManager,
				853	input,
				854	kernel,
				855	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				856	expectedOutput,
				857	qScale,
				858	qOffset,
				859	layout);
				860	}
				861
				862	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				863	typename T = armnn::ResolveType<ArmnnType>>
				864	LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
				865	armnn::IWorkloadFactory& workloadFactory,
				866	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				867	const armnn::DataLayout layout,
				868	float qScale,
				869	int32_t qOffset)
				870	{
				871	// Use a single-batch 1-channel 3x3 image as input.
				872	armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
				873	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	874	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	875	11,21,31,
				876	12,22,32,
				877	13,23,33
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	878	},
				879	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	880
				881	// Use 1 batch of a 1-channel 2x2 kernel.
				882	armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
				883	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	884	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	885	-11,-21,
				886	-12,-22,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	887	},
				888	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	889
				890	// Expected output is 1 batch of a 1-channel 6x8 image.
				891	// Manually calculated like this:
				892	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				893	//[-110 -210 -120 -2211 ; -110 -210 -1211 -2221 ; -110 -210 -1221 -2231 ; -110 -210 -1231 -220 ..]
				894	//[-110 -2111 -120 -2212 ; -1111 -2121 -1212 -2222 ; -1121 -2131 -1222 -2232 ; -1131 -210 -1232 -220 ..]
				895	//[-110 -2112 -120 -2213 ; -1112 -2122 -1213 -2223 ; -1122 -2132 -1223 -2233 ; -1132 -210 -1233 -220 ..]
				896	//[-110 -2113 -120 -220 ; -1113 -2123 -120 -220 ; -1123 -2133 -120 -220 ; -1133 -210 -120 -220 ..]
				897	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				898	//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
				899	armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
				900	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	901	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	902	0, 0, 0, 0, 0, 0,
				903	-242, -594, -934, -372, 0, 0,
				904	-495, -1190, -1850, -725, 0, 0,
				905	-538, -1256, -1916, -748, 0, 0,
				906	-273, -626, -946, -363, 0, 0,
				907	0, 0, 0, 0, 0, 0,
				908	0, 0, 0, 0, 0, 0,
				909	0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	910	},
				911	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	912
				913	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				914	workloadFactory,
				915	memoryManager,
				916	input,
				917	kernel,
				918	GetBias2<ArmnnBType>(false, qScale * qScale),
				919	expectedOutput,
				920	qScale,
				921	qOffset,
				922	layout,
				923	1, // Padding left.
				924	2, // Padding top.
				925	3, // Padding right.
				926	4); // Padding bottom.
				927	}
				928
				929	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				930	typename T = armnn::ResolveType<ArmnnType>>
				931	LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
				932	armnn::IWorkloadFactory& workloadFactory,
				933	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				934	const armnn::DataLayout layout,
				935	float qScale,
				936	int32_t qOffset)
				937	{
				938	// Use a single-batch 1-channel 5x5 image as input.
				939	armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
				940	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	941	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	942	11,21,31,41,51,
				943	12,22,32,42,52,
				944	13,23,33,43,53,
				945	14,24,34,44,54,
				946	15,25,35,45,55,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	947	}, qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	948
				949	// Use 1 batch of a 1-channel 4x4 kernel.
				950	armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
				951	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	952	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	953	-11,-21,-31,-41,
				954	-12,-22,-32,-42,
				955	-13,-23,-33,-43,
				956	-14,-24,-34,-44,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	957	},
				958	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	959
				960	// Expected output is 1 batch of a 1-channel 5x5 image.
				961	armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
				962	std::vector<T> myVec(outputDesc.GetNumElements(), 0);
				963	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	964	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	965	-7140, -10580, -13940, -9300, -5230,
				966	-9590, -14120, -18520, -12290, -6860,
				967	-9980, -14560, -18960, -12560, -7000,
				968	-7518, -10904, -14144, -9318, -5152,
				969	-5032, -7256, -9376, -6142, -3368,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	970	},
				971	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	972
				973	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				974	workloadFactory,
				975	memoryManager,
				976	input,
				977	kernel,
				978	GetBias2<ArmnnBType>(false, qScale * qScale),
				979	expectedOutput,
				980	qScale,
				981	qOffset,
				982	layout,
				983	1, // Padding left.
				984	1, // Padding top.
				985	2, // Padding right.
				986	2); // Padding bottom.
				987	}
				988
				989	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				990	LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
				991	armnn::IWorkloadFactory& workloadFactory,
				992	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				993	const std::vector<float>& inputNoQuantizedValues,
				994	armnn::TensorInfo& inputTensorInfo,
				995	const std::vector<float>& kernelNoQuantizedValues,
				996	armnn::TensorInfo& kernelTensorInfo,
				997	const std::vector<float>& outputExpectedNoQuantizedValues,
				998	armnn::TensorInfo& outputTensorInfo,
				999	uint32_t dilationX,
				1000	uint32_t dilationY,
				1001	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1002	uint32_t padLeft = 0,
				1003	uint32_t padTop = 0,
				1004	uint32_t padRight = 0,
				1005	uint32_t padBottom = 0,
				1006	uint32_t strideX = 1,
				1007	uint32_t strideY = 1,
				1008	bool biasEnabled = false
				1009	)
				1010	{
				1011	float qScale;
				1012	int32_t qOffset;
				1013	switch (ArmnnType)
				1014	{
				1015	case armnn::DataType::QuantisedAsymm8:
				1016	{
				1017	qScale = 0.1f;
				1018	qOffset = 128;
				1019	break;
				1020	}
				1021	case armnn::DataType::QuantisedSymm16:
				1022	{
				1023	qScale = 0.1f;
				1024	qOffset = 0;
				1025	break;
				1026	}
				1027	case armnn::DataType::Float32:
				1028	default:
				1029	{
				1030	qScale = 0.f;
				1031	qOffset = 0;
				1032	break;
				1033	}
				1034	}
				1035
				1036	inputTensorInfo.SetQuantizationScale(qScale);
				1037	inputTensorInfo.SetQuantizationOffset(qOffset);
				1038	kernelTensorInfo.SetQuantizationScale(qScale);
				1039	kernelTensorInfo.SetQuantizationOffset(qOffset);
				1040	outputTensorInfo.SetQuantizationScale(qScale);
				1041	outputTensorInfo.SetQuantizationOffset(qOffset);
				1042
				1043	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1044	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				1045	inputTensorInfo.GetQuantizationScale(),
				1046	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1047	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1048	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				1049	kernelTensorInfo.GetQuantizationScale(),
				1050	kernelTensorInfo.GetQuantizationOffset())));
				1051	auto expectedOutput =
				1052	MakeTensor<T, 4>(outputTensorInfo,
				1053	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				1054	outputTensorInfo.GetQuantizationScale(),
				1055	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1056
				1057	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				1058	workloadFactory,
				1059	memoryManager,
				1060	input,
				1061	kernel,
				1062	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				1063	expectedOutput,
				1064	qScale,
				1065	qOffset,
				1066	layout,
				1067	padLeft,
				1068	padTop,
				1069	padRight,
				1070	padBottom,
				1071	strideX,
				1072	strideY,
				1073	dilationX,
				1074	dilationY);
				1075	}
				1076
				1077	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1078	LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
				1079	armnn::IWorkloadFactory& workloadFactory,
				1080	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1081	bool biasEnabled,
				1082	const armnn::DataLayout layout)
				1083	{
				1084	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1085	std::vector<float> inputNoQuantizedValues =
				1086	{
				1087	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1088	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1089	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1090	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1091	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1092	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1093	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1094	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1095	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1096	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1097	};
				1098
				1099	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				1100	std::vector<float> kernelNoQuantizedValues =
				1101	{
				1102	1, 2, 3,
				1103	4, 5, 6,
				1104	7, 8, 9
				1105	};
				1106
				1107	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1108	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1109	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1110	std::vector<float> outputExpectedNoQuantizedValues =
				1111	{
				1112	6., 5., 5., 5.,
				1113	6., 5., 5., 5.,
				1114	6., 5., 5., 5.,
				1115	3., 2., 2., 2.
				1116	};
				1117
				1118	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1119	workloadFactory,
				1120	memoryManager,
				1121	inputNoQuantizedValues,
				1122	inputTensorInfo,
				1123	kernelNoQuantizedValues,
				1124	kernelTensorInfo,
				1125	outputExpectedNoQuantizedValues,
				1126	outputTensorInfo,
				1127	3,
				1128	3,
				1129	layout,
				1130	biasEnabled);
				1131	}
				1132
				1133	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1134	LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
				1135	armnn::IWorkloadFactory& workloadFactory,
				1136	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1137	bool biasEnabled,
				1138	const armnn::DataLayout layout)
				1139	{
				1140	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				1141	std::vector<float> inputNoQuantizedValues =
				1142	{
				1143	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1144	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1145	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1146	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1147	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1148	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1149	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1150	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1151	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1152	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1153
				1154	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1155	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1156	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1157	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1158	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1159	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1160	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1161	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1162	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1163	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1164	};
				1165
				1166	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				1167	std::vector<float> kernelNoQuantizedValues =
				1168	{
				1169	1, 2, 3,
				1170	4, 5, 6,
				1171	7, 8, 9,
				1172
				1173	1, 2, 3,
				1174	4, 5, 6,
				1175	7, 8, 9
				1176	};
				1177
				1178	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1179	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1180	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1181	std::vector<float> outputExpectedNoQuantizedValues =
				1182	{
				1183	12., 10., 10., 10.,
				1184	12., 10., 10., 10.,
				1185	12., 10., 10., 10.,
				1186	6., 4., 4., 4.
				1187	};
				1188
				1189	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1190	workloadFactory,
				1191	memoryManager,
				1192	inputNoQuantizedValues,
				1193	inputTensorInfo,
				1194	kernelNoQuantizedValues,
				1195	kernelTensorInfo,
				1196	outputExpectedNoQuantizedValues,
				1197	outputTensorInfo,
				1198	3,
				1199	3,
				1200	layout,
				1201	biasEnabled);
				1202	}
				1203
				1204	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1205	LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
				1206	armnn::IWorkloadFactory &workloadFactory,
				1207	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				1208	bool biasEnabled,
				1209	const armnn::DataLayout layout)
				1210	{
				1211	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1212	std::vector<float> inputNoQuantizedValues =
				1213	{
				1214	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1215	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1216	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1217	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1218	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1219	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1220	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1221	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1222	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1223	1, 1, 1, 1, 1, 1, 1, 1, 1, 1
				1224	};
				1225
				1226	armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
				1227	std::vector<float> kernelNoQuantizedValues =
				1228	{
				1229	1, 2,
				1230	3, 4
				1231	};
				1232
				1233	// Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
				1234	// therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
				1235	// where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
				1236	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1237	std::vector<float> outputExpectedNoQuantizedValues =
				1238	{
				1239	4, 7, 7, 3,
				1240	6, 10, 10, 4,
				1241	6, 10, 10, 4,
				1242	2, 3, 3, 1
				1243	};
				1244	uint32_t padLeft = 1;
				1245	uint32_t padTop = 1;
				1246	uint32_t padRight = 1;
				1247	uint32_t padBottom = 1;
				1248
				1249	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1250	workloadFactory,
				1251	memoryManager,
				1252	inputNoQuantizedValues,
				1253	inputTensorInfo,
				1254	kernelNoQuantizedValues,
				1255	kernelTensorInfo,
				1256	outputExpectedNoQuantizedValues,
				1257	outputTensorInfo,
				1258	2,
				1259	2,
				1260	layout,
				1261	padLeft,
				1262	padTop,
				1263	padRight,
				1264	padBottom,
				1265	3,
				1266	3,
				1267	biasEnabled
				1268	);
				1269	}
				1270
				1271	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				1272	LayerTestResult<T,4> CompareConvolution2dTestImpl(
				1273	armnn::IWorkloadFactory& workloadFactory,
				1274	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1275	armnn::IWorkloadFactory& refWorkloadFactory)
				1276	{
				1277	unsigned int inputHeight = 8;
				1278	unsigned int inputWidth = 16;
				1279	unsigned int inputChannels = 3;
				1280	unsigned int inputNum = 5;
				1281
				1282	unsigned int kernelHeight = 3;
				1283	unsigned int kernelWidth = 3;
				1284
				1285	unsigned int strideX = 2;
				1286	unsigned int strideY = 3;
				1287	unsigned int padX = 1;
				1288	unsigned int padY = 1;
				1289
				1290	unsigned int outputNum = inputNum;
				1291	unsigned int outputChannels = 2;
				1292	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				1293	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				1294
				1295	armnn::TensorInfo inputTensorInfo;
				1296	armnn::TensorInfo outputTensorInfo;
				1297	armnn::TensorInfo kernelDesc;
				1298	armnn::TensorInfo biasDesc;
				1299
				1300	unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
				1301	unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
				1302	unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
				1303	unsigned int biasShape[] = {outputChannels};
				1304
				1305	inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
				1306	outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
				1307	kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
				1308	biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
				1309
				1310	LayerTestResult<T,4> ret(outputTensorInfo);
				1311
				1312	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
				1313	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
				1314	auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
				1315
				1316	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1317	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1318
				1319	armnn::Convolution2dQueueDescriptor data;
				1320	armnn::WorkloadInfo info;
				1321	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1322	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1323
				1324	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1325	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1326
				1327	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1328	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1329	data.m_Weight = &weightsTensor;
				1330	data.m_Bias = &biasTensor;
				1331	data.m_Parameters.m_StrideX = strideX;
				1332	data.m_Parameters.m_StrideY = strideY;
				1333	data.m_Parameters.m_PadLeft = padX;
				1334	data.m_Parameters.m_PadRight = padX;
				1335	data.m_Parameters.m_PadTop = padY;
				1336	data.m_Parameters.m_PadBottom = padY;
				1337	data.m_Parameters.m_BiasEnabled = true;
				1338
				1339	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				1340	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				1341
				1342	armnn::Convolution2dQueueDescriptor refData = data;
				1343	armnn::WorkloadInfo refInfo = info;
				1344	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				1345	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				1346
				1347	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				1348	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
				1349
				1350	outputHandleRef->Allocate();
				1351	inputHandleRef->Allocate();
				1352
				1353	inputHandle->Allocate();
				1354	outputHandle->Allocate();
				1355
				1356	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1357	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				1358
				1359	ExecuteWorkload(*workload, memoryManager);
				1360
				1361	workloadRef->PostAllocationConfigure();
				1362	workloadRef->Execute();
				1363
				1364	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1365	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				1366
				1367	return ret;
				1368	}
				1369
				1370	//
				1371	// DepthwiseConvolution2d implementations
				1372	//
				1373
				1374	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1375	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1376	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
				1377	armnn::IWorkloadFactory& workloadFactory,
				1378	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1379	const boost::multi_array<T, 4>& input,
				1380	const boost::multi_array<T, 4>& kernel,
				1381	const boost::multi_array<B, 1>& bias,
				1382	const boost::multi_array<T, 4>& outputExpected,
				1383	float qScale,
				1384	int32_t qOffset,
				1385	const armnn::DataLayout layout,
				1386	uint32_t padLeft = 0,
				1387	uint32_t padTop = 0,
				1388	uint32_t padRight = 0,
				1389	uint32_t padBottom = 0,
				1390	uint32_t strideX = 1,
				1391	uint32_t strideY = 1)
				1392	{
				1393	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				1394	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
				1395	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
				1396	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
				1397	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				1398	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				1399	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				1400	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				1401	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				1402	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				1403	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				1404	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				1405
				1406	// If a bias is used, its size must equal the number of output channels.
				1407	bool biasEnabled = bias.size() > 0;
				1408	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1409
				1410	// Creates the tensors.
				1411	armnn::TensorInfo inputTensorInfo =
				1412	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1413	armnn::TensorInfo outputTensorInfo =
				1414	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1415	armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1416	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1417
				1418	// Set quantization parameters if the requested type is a quantized type.
				1419	if (armnn::IsQuantizedType<T>())
				1420	{
				1421	inputTensorInfo.SetQuantizationScale(qScale);
				1422	inputTensorInfo.SetQuantizationOffset(qOffset);
				1423	outputTensorInfo.SetQuantizationScale(qScale);
				1424	outputTensorInfo.SetQuantizationOffset(qOffset);
				1425	kernelDesc.SetQuantizationScale(qScale);
				1426	kernelDesc.SetQuantizationOffset(qOffset);
				1427	biasDesc.SetQuantizationScale(qScale*qScale);
				1428	biasDesc.SetQuantizationOffset(0);
				1429	}
				1430
				1431	// Construct the input data.
				1432	std::vector<T> inputData;
				1433	inputData.assign(input.data(), input.data() + inputChannelsinputHeightinputWidth);
				1434
				1435	// At this point if we require it permute the input data
				1436	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1437	if (layout == armnn::DataLayout::NHWC)
				1438	{
				1439	std::vector<T> tmp(inputData.size());
				1440	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1441	inputData = tmp;
				1442	}
				1443
				1444	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1445
				1446	// Construct the output data, with bias applied, as appropriate.
				1447	std::vector<T> outputData;
				1448	outputData.assign(outputExpected.data(), outputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1449	if (biasEnabled)
				1450	{
				1451	std::vector<T> biasV;
				1452	biasV.assign(bias.data(), bias.data() + outputChannels);
				1453	ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1454	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1455	outputWidth, outputHeight);
				1456	}
				1457
				1458	LayerTestResult<T, 4> ret(outputTensorInfo);
				1459
				1460	// At this point if we require it permute the expected output
				1461	if (layout == armnn::DataLayout::NHWC)
				1462	{
				1463	std::vector<T> tmp(outputData.size());
				1464	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1465	outputData = tmp;
				1466	}
				1467
				1468	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1469
				1470	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1471	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1472
				1473	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1474
				1475	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1476
				1477	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1478	if (biasEnabled)
				1479	{
				1480	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1481	}
				1482
				1483	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1484	data.m_Weight = &weightsTensor;
				1485	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
				1486	data.m_Parameters.m_StrideX = strideX;
				1487	data.m_Parameters.m_StrideY = strideY;
				1488	data.m_Parameters.m_PadLeft = padLeft;
				1489	data.m_Parameters.m_PadRight = padRight;
				1490	data.m_Parameters.m_PadTop = padTop;
				1491	data.m_Parameters.m_PadBottom = padBottom;
				1492	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1493	data.m_Parameters.m_DataLayout = layout;
				1494
				1495	armnn::WorkloadInfo info;
				1496	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1497	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1498
				1499	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1500	inputHandle->Allocate();
				1501	outputHandle->Allocate();
				1502
				1503	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				1504
				1505	ExecuteWorkload(*workload, memoryManager);
				1506
				1507	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1508
				1509	return ret;
				1510	}
				1511
				1512	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1513	LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
				1514	armnn::IWorkloadFactory& workloadFactory,
				1515	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1516	float qScale,
				1517	int32_t qOffset,
				1518	bool biasEnabled,
				1519	const armnn::DataLayout layout)
				1520	{
				1521	using B = armnn::ResolveType<ArmnnBType>;
				1522
				1523	unsigned int inputHeight = 3;
				1524	unsigned int inputWidth = 3;
				1525	unsigned int inputChannels = 2;
				1526	unsigned int inputNum = 1;
				1527
				1528	unsigned int kernelHeight = 3;
				1529	unsigned int kernelWidth = 3;
				1530	unsigned int kernelChannels = inputChannels;
				1531	unsigned int kernelDepthMultiplier = 1;
				1532
				1533	unsigned int outputHeight = 1;
				1534	unsigned int outputWidth = 1;
				1535	unsigned int outputChannels = kernelChannels;
				1536	unsigned int outputNum = inputNum;
				1537
				1538	armnn::TensorInfo inputTensorInfo =
				1539	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1540	armnn::TensorInfo outputTensorInfo =
				1541	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1542	armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
				1543	ArmnnType);
				1544	armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
				1545
				1546	// Set quantization parameters if the requested type is a quantized type.
				1547	if(armnn::IsQuantizedType<T>())
				1548	{
				1549	inputTensorInfo.SetQuantizationScale(qScale);
				1550	inputTensorInfo.SetQuantizationOffset(qOffset);
				1551	outputTensorInfo.SetQuantizationScale(qScale);
				1552	outputTensorInfo.SetQuantizationOffset(qOffset);
				1553	kernelDesc.SetQuantizationScale(qScale);
				1554	kernelDesc.SetQuantizationOffset(qOffset);
				1555	biasDesc.SetQuantizationScale(qScale*qScale);
				1556	biasDesc.SetQuantizationOffset(0);
				1557	}
				1558	std::vector<T> inputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1559	QuantizedVector<T>({
				1560	1.f, 2.f, 1.f,
				1561	2.f, 1.f, 2.f,
				1562	1.f, 2.f, 1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1563
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1564	1.f, 2.f, 1.f,
				1565	2.f, 1.f, 2.f,
				1566	1.f, 2.f, 1.f,
				1567	},
				1568	inputTensorInfo.GetQuantizationScale(),
				1569	inputTensorInfo.GetQuantizationOffset()));
				1570
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1571	// at this point if we require it permute the input data
				1572	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1573	if (layout == armnn::DataLayout::NHWC)
				1574	{
				1575	std::vector<T> tmp(inputData.size());
				1576	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1577	inputData = tmp;
				1578	}
				1579	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1580
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1581	std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
				1582	biasDesc.GetQuantizationScale(),
				1583	biasDesc.GetQuantizationOffset()));
				1584
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1585	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1586
				1587	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1588	QuantizedVector<T>({
				1589	1.f, 0.f, 1.f,
				1590	0.f, 0.f, 0.f,
				1591	-1.f, 0.f, -1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1592
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1593	1.f, 0.f, 1.f,
				1594	0.f, 0.f, 0.f,
				1595	-1.f, 0.f, -1.f,
				1596	},
				1597	kernelDesc.GetQuantizationScale(),
				1598	kernelDesc.GetQuantizationOffset()));
				1599
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1600	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1601
				1602	// Manually calculated.
				1603	std::vector<T> outputImage(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1604	QuantizedVector<T>({ 0.f, 0.f },
				1605	outputTensorInfo.GetQuantizationScale(),
				1606	outputTensorInfo.GetQuantizationOffset())
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1607	);
				1608
				1609	// Optionally apply bias to output image.
				1610	if(biasEnabled)
				1611	{
				1612	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1613	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1614	outputWidth, outputHeight);
				1615	}
				1616
				1617	LayerTestResult<T, 4> ret(outputTensorInfo);
				1618	if (layout == armnn::DataLayout::NHWC)
				1619	{
				1620	std::vector<T> tmp(outputImage.size());
				1621	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
				1622	outputImage = tmp;
				1623	}
				1624
				1625	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1626
				1627	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1628	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1629
				1630	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1631	armnn::WorkloadInfo info;
				1632	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1633	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1634
				1635	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1636	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1637
				1638	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1639	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1640
				1641	data.m_Weight = &weightsTensor;
				1642	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1643	data.m_Parameters.m_StrideX = 1;
				1644	data.m_Parameters.m_StrideY = 1;
				1645	data.m_Parameters.m_PadLeft = 0;
				1646	data.m_Parameters.m_PadRight = 0;
				1647	data.m_Parameters.m_PadTop = 0;
				1648	data.m_Parameters.m_PadBottom = 0;
				1649	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1650	data.m_Parameters.m_DataLayout = layout;
				1651
				1652	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1653	inputHandle->Allocate();
				1654	outputHandle->Allocate();
				1655
				1656	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1657
				1658	ExecuteWorkload(*workload, memoryManager);
				1659
				1660	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1661
				1662	return ret;
				1663	}
				1664
				1665	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1666	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1667	armnn::IWorkloadFactory& workloadFactory,
				1668	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1669	float qScale,
				1670	int32_t qOffset,
				1671	bool biasEnabled,
				1672	const armnn::DataLayout layout)
				1673	{
				1674	using B = armnn::ResolveType<ArmnnBType>;
				1675
				1676	unsigned int depthMultiplier = 2;
				1677
				1678	unsigned int inputHeight = 8;
				1679	unsigned int inputWidth = 16;
				1680	unsigned int inputChannels = 2;
				1681	unsigned int inputBatchSize = 1;
				1682
				1683	unsigned int kernelHeight = 5;
				1684	unsigned int kernelWidth = 3;
				1685
				1686	unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
				1687	unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
				1688	unsigned int outputChannels = inputChannels * depthMultiplier;
				1689	unsigned int outputBatchSize = inputBatchSize;
				1690
				1691	armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
				1692	inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1693	armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
				1694	outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1695	armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
				1696	ArmnnType);
				1697	armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
				1698
				1699	// Set quantization parameters if the requested type is a quantized type.
				1700	if(armnn::IsQuantizedType<T>())
				1701	{
				1702	inputTensorInfo.SetQuantizationScale(qScale);
				1703	inputTensorInfo.SetQuantizationOffset(qOffset);
				1704	outputTensorInfo.SetQuantizationScale(qScale);
				1705	outputTensorInfo.SetQuantizationOffset(qOffset);
				1706	kernelDesc.SetQuantizationScale(qScale);
				1707	kernelDesc.SetQuantizationOffset(qOffset);
				1708	biasDesc.SetQuantizationScale(qScale*qScale);
				1709	biasDesc.SetQuantizationOffset(0);
				1710	}
				1711
				1712	// NOTE: originalInputData is in NCHW format
				1713	std::vector<T> originalInputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1714	QuantizedVector<T>({
				1715	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1716	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1717	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1718	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1719	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1720	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1721	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1722	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1723	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1724	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1725	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1726	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1727	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1728	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1729	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1730	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1731	},
				1732	inputTensorInfo.GetQuantizationScale(),
				1733	inputTensorInfo.GetQuantizationOffset()));
				1734
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1735	std::vector<T> inputData = originalInputData;
				1736	// at this point if we require it permute the input data
				1737	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1738	if (layout == armnn::DataLayout::NHWC)
				1739	{
				1740	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
				1741	originalInputData.data(), inputData.data(), sizeof(T));
				1742	}
				1743	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1744
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1745	std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
				1746	biasDesc.GetQuantizationScale(),
				1747	biasDesc.GetQuantizationOffset());
				1748
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1749	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1750
				1751	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1752	QuantizedVector<T>({
				1753	1, 1, 1,
				1754	1, -1, 1,
				1755	1, 1, 1,
				1756	1, 1, 1,
				1757	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1758
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1759	2, 2, 2,
				1760	2, 2, 2,
				1761	2, 2, 2,
				1762	2, 2, 2,
				1763	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1764
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1765	0, 0, 0,
				1766	0, -1, 0,
				1767	0, 0, 0,
				1768	0, 0, 0,
				1769	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1770
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1771	0, 0, 0,
				1772	0, 0, 0,
				1773	0, 1, 0,
				1774	0, 0, 0,
				1775	0, 0, 0
				1776	},
				1777	kernelDesc.GetQuantizationScale(),
				1778	kernelDesc.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1779
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1780	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1781
				1782	// Manually calculated.
				1783	std::vector<T> originalOutputImage = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1784	QuantizedVector<T>({
				1785	3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
				1786	6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
				1787	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
				1788	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1789	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1790	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1791
				1792	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1793	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1794	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1795	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1796	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1797	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1798
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1799	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1800	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1801	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1802	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1803	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1804	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1805
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1806	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1807	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1808	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1809	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1810	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1811	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1812	},
				1813	outputTensorInfo.GetQuantizationScale(),
				1814	outputTensorInfo.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1815
				1816	// Optionally apply bias to output image.
				1817	if(biasEnabled)
				1818	{
				1819	ApplyBias(originalOutputImage,
				1820	outputTensorInfo.GetQuantizationScale(),
				1821	outputTensorInfo.GetQuantizationOffset(),
				1822	biasV,
				1823	biasDesc.GetQuantizationScale(),
				1824	biasDesc.GetQuantizationOffset(),
				1825	outputWidth,
				1826	outputHeight);
				1827	}
				1828
				1829	LayerTestResult<T, 4> ret(outputTensorInfo);
				1830	std::vector<T> outputImage = originalOutputImage;
				1831	if (layout == armnn::DataLayout::NHWC)
				1832	{
				1833	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
				1834	originalOutputImage.data(), outputImage.data(), sizeof(T));
				1835	}
				1836
				1837	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1838
				1839	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1840	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1841
				1842	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1843	armnn::WorkloadInfo info;
				1844	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1845	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1846
				1847	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1848	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1849
				1850	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1851	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1852
				1853	data.m_Weight = &weightsTensor;
				1854	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1855	data.m_Parameters.m_StrideX = 2;
				1856	data.m_Parameters.m_StrideY = 1;
				1857	data.m_Parameters.m_PadLeft = 0;
				1858	data.m_Parameters.m_PadRight = 0;
				1859	data.m_Parameters.m_PadTop = 1;
				1860	data.m_Parameters.m_PadBottom = 1;
				1861	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1862	data.m_Parameters.m_DataLayout = layout;
				1863
				1864	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1865	inputHandle->Allocate();
				1866	outputHandle->Allocate();
				1867
				1868	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1869
				1870	ExecuteWorkload(*workload, memoryManager);
				1871
				1872	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1873
				1874	return ret;
				1875	}
				1876
				1877	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1878	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1879	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1880	armnn::IWorkloadFactory& workloadFactory,
				1881	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1882	const boost::multi_array<T, 4>& originalInput,
				1883	const boost::multi_array<T, 4>& originalKernel,
				1884	const boost::multi_array<B, 1>& bias,
				1885	const boost::multi_array<T, 4>& originalOutputExpected,
				1886	float qScale,
				1887	int32_t qOffset,
				1888	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1889	uint32_t padLeft = 0,
				1890	uint32_t padTop = 0,
				1891	uint32_t padRight = 0,
				1892	uint32_t padBottom = 0,
				1893	uint32_t strideX = 1,
				1894	uint32_t strideY = 1,
				1895	uint32_t dilationX = 1,
				1896	uint32_t dilationY = 1)
				1897	{
				1898	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				1899	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				1900	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				1901	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				1902
				1903	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				1904	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				1905	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				1906	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				1907
				1908	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				1909	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				1910	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				1911	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				1912
				1913	bool biasEnabled = bias.size() > 0;
				1914
				1915	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				1916	BOOST_ASSERT(inputNum == 1);
				1917	BOOST_ASSERT(outputNum == 1);
				1918
				1919	// If a bias is used, its size must equal the number of output channels.
				1920	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1921
				1922
				1923	// Note these tensors will use two (identical) batches.
				1924	armnn::TensorInfo inputTensorInfo =
				1925	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1926	armnn::TensorInfo outputTensorInfo =
				1927	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1928
				1929	// Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
				1930	armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1931
				1932	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1933
				1934	// Set quantization parameters if the requested type is a quantized type.
				1935	if(armnn::IsQuantizedType<T>())
				1936	{
				1937	inputTensorInfo.SetQuantizationScale(qScale);
				1938	inputTensorInfo.SetQuantizationOffset(qOffset);
				1939	outputTensorInfo.SetQuantizationScale(qScale);
				1940	outputTensorInfo.SetQuantizationOffset(qOffset);
				1941	kernelDesc.SetQuantizationScale(qScale);
				1942	kernelDesc.SetQuantizationOffset(qOffset);
				1943	biasDesc.SetQuantizationScale(qScale*qScale);
				1944	biasDesc.SetQuantizationOffset(0);
				1945	}
				1946
				1947	LayerTestResult<T, 4> ret(outputTensorInfo);
				1948
				1949	// Construct input data
				1950	std::vector<T> input;
				1951	input.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				1952	std::vector<T> inputData;
				1953	inputData.insert(inputData.end(), input.begin(), input.end());
				1954	inputData.insert(inputData.end(), input.begin(), input.end());
				1955
				1956	// at this point if we require it permute the input data
				1957	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1958	if (layout == armnn::DataLayout::NHWC)
				1959	{
				1960	std::vector<T> tmp(inputData.size());
				1961	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1962	inputData = tmp;
				1963	}
				1964
				1965	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1966
				1967	std::vector<T> output;
				1968	output.assign(originalOutputExpected.data(),
				1969	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1970
				1971	// Apply bias to output data if it is enabled.
				1972	if(biasEnabled)
				1973	{
				1974	std::vector<T> biasV;
				1975	biasV.assign(bias.data(), bias.data() + outputChannels);
				1976	ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1977	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1978	outputWidth, outputHeight);
				1979	}
				1980
				1981	// Construct expected output data
				1982	std::vector<T> outputData;
				1983	outputData.insert(outputData.end(), output.begin(), output.end());
				1984	outputData.insert(outputData.end(), output.begin(), output.end());
				1985
				1986	// at this point if we require it permute the expected output
				1987	if (layout == armnn::DataLayout::NHWC)
				1988	{
				1989	std::vector<T> tmp(outputData.size());
				1990	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1991	outputData = tmp;
				1992	}
				1993	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1994
				1995	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1996	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1997
				1998	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1999	armnn::WorkloadInfo info;
				2000	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2001	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2002
				2003	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				2004	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2005
				2006	if(biasEnabled)
				2007	{
				2008	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2009	}
				2010
				2011	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2012	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2013
				2014	data.m_Weight = &weightsTensor;
				2015	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				2016	data.m_Parameters.m_StrideX = strideX;
				2017	data.m_Parameters.m_StrideY = strideY;
				2018	data.m_Parameters.m_PadLeft = padLeft;
				2019	data.m_Parameters.m_PadRight = padRight;
				2020	data.m_Parameters.m_PadTop = padTop;
				2021	data.m_Parameters.m_PadBottom = padBottom;
				2022	data.m_Parameters.m_BiasEnabled = biasEnabled;
				2023	data.m_Parameters.m_DataLayout = layout;
				2024	data.m_Parameters.m_DilationX = dilationX;
				2025	data.m_Parameters.m_DilationY = dilationY;
				2026
				2027	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2028	inputHandle->Allocate();
				2029	outputHandle->Allocate();
				2030
				2031	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				2032
				2033	ExecuteWorkload(*workload, memoryManager);
				2034
				2035	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2036
				2037	return ret;
				2038	}
				2039
				2040	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2041	typename T = armnn::ResolveType<ArmnnType>>
				2042	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
				2043	armnn::IWorkloadFactory& workloadFactory,
				2044	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2045	float qScale,
				2046	int32_t qOffset,
				2047	bool biasEnabled,
				2048	const armnn::DataLayout layout)
				2049	{
				2050	// Use a single-batch 2-channel 5x5 image as input.
				2051	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2052	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2053	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2054	0, 1, 2, 3, 4,
				2055	5, 6, 7, 8, 9,
				2056	10, 11, 12, 13, 14,
				2057	15, 16, 17, 18, 19,
				2058	20, 21, 22, 23, 24,
				2059
				2060	25, 26, 27, 28, 29,
				2061	30, 31, 32, 33, 34,
				2062	35, 36, 37, 38, 39,
				2063	40, 41, 42, 43, 44,
				2064	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2065	},
				2066	inputTensorInfo.GetQuantizationScale(),
				2067	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2068
				2069	// Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
				2070	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2071	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2072	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2073	32, 31, 30, 29,
				2074	28, 27, 26, 25,
				2075	24, 23, 22, 21,
				2076	20, 19, 18, 17,
				2077
				2078	16, 15, 14, 13,
				2079	12, 11, 10, 9,
				2080	8, 7, 6, 5,
				2081	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2082	},
				2083	kernelTensorInfo.GetQuantizationScale(),
				2084	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2085
				2086	// Expected output is 1 batch of a 2-channel 5x5 image.
				2087	// Calculated using the python tensorflow library with strideX=1, strideY=1.
				2088	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2089	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2090	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2091	1062, 1580, 1850, 1530, 1117,
				2092	2140, 3108, 3500, 2842, 2042,
				2093	3580, 5068, 5460, 4342, 3062,
				2094	3618, 5072, 5390, 4248, 2971,
				2095	3074, 4282, 4510, 3533, 2457,
				2096
				2097	1550, 2284, 2362, 1955, 1428,
				2098	2910, 4206, 4342, 3528, 2536,
				2099	3390, 4886, 5022, 4068, 2916,
				2100	3566, 5056, 5182, 4133, 2922,
				2101	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2102	},
				2103	outputTensorInfo.GetQuantizationScale(),
				2104	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2105
				2106	return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
				2107	workloadFactory,
				2108	memoryManager,
				2109	input,
				2110	kernel,
				2111	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2112	expectedOutput,
				2113	qScale,
				2114	qOffset,
				2115	layout,
				2116	1, // Padding left.
				2117	1, // Padding top.
				2118	2, // Padding right.
				2119	2, // Padding bottom.
				2120	1, // strideX
				2121	1); // strideY
				2122	}
				2123
				2124	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2125	typename T = armnn::ResolveType<ArmnnType>>
				2126	LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
				2127	armnn::IWorkloadFactory& workloadFactory,
				2128	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2129	float qScale,
				2130	int32_t qOffset,
				2131	bool biasEnabled)
				2132	{
				2133	auto layout = armnn::DataLayout::NHWC;
				2134
				2135	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2136	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2137	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2138	0, 1, 2, 3, 4,
				2139	5, 6, 7, 8, 9,
				2140	10, 11, 12, 13, 14,
				2141	15, 16, 17, 18, 19,
				2142	20, 21, 22, 23, 24,
				2143
				2144	25, 26, 27, 28, 29,
				2145	30, 31, 32, 33, 34,
				2146	35, 36, 37, 38, 39,
				2147	40, 41, 42, 43, 44,
				2148	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2149	},
				2150	inputTensorInfo.GetQuantizationScale(),
				2151	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2152
				2153	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2154	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2155	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2156	32, 31, 30, 29,
				2157	28, 27, 26, 25,
				2158	24, 23, 22, 21,
				2159	20, 19, 18, 17,
				2160
				2161	16, 15, 14, 13,
				2162	12, 11, 10, 9,
				2163	8, 7, 6, 5,
				2164	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2165	},
				2166	kernelTensorInfo.GetQuantizationScale(),
				2167	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2168
				2169	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2170	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2171	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2172	1062, 1580, 1850, 1530, 1117,
				2173	2140, 3108, 3500, 2842, 2042,
				2174	3580, 5068, 5460, 4342, 3062,
				2175	3618, 5072, 5390, 4248, 2971,
				2176	3074, 4282, 4510, 3533, 2457,
				2177
				2178	1550, 2284, 2362, 1955, 1428,
				2179	2910, 4206, 4342, 3528, 2536,
				2180	3390, 4886, 5022, 4068, 2916,
				2181	3566, 5056, 5182, 4133, 2922,
				2182	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2183	},
				2184	outputTensorInfo.GetQuantizationScale(),
				2185	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2186
				2187	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2188	workloadFactory,
				2189	memoryManager,
				2190	input,
				2191	kernel,
				2192	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2193	expectedOutput,
				2194	qScale,
				2195	qOffset,
				2196	layout,
				2197	1, // Padding left.
				2198	1, // Padding top.
				2199	2, // Padding right.
				2200	2, // Padding bottom.
				2201	1, // strideX
				2202	1); // strideY
				2203	}
				2204
				2205	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2206	typename T = armnn::ResolveType<ArmnnType>>
				2207	LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
				2208	armnn::IWorkloadFactory& workloadFactory,
				2209	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2210	float qScale,
				2211	int32_t qOffset,
				2212	bool biasEnabled)
				2213	{
				2214	auto layout = armnn::DataLayout::NHWC;
				2215
				2216	armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
				2217	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2218	QuantizedVector<T>({
				2219	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2220	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2221	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2222	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2223	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2224	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2225	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2226	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2227	0, 0, 0, 0, 0, 0, 0, 0, 0
				2228	},
				2229	inputTensorInfo.GetQuantizationScale(),
				2230	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2231
				2232	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2233	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2234	QuantizedVector<T>({
				2235	1, 2, 3,
				2236	4, 5, 6,
				2237	7, 8, 9
				2238	},
				2239	kernelTensorInfo.GetQuantizationScale(),
				2240	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2241
				2242	uint32_t padLeft = 0;
				2243	uint32_t padTop = 0;
				2244	uint32_t padRight = 0;
				2245	uint32_t padBottom = 0;
				2246	uint32_t strideX = 1;
				2247	uint32_t strideY = 1;
				2248	uint32_t dilationX = 3;
				2249	uint32_t dilationY = 3;
				2250
				2251	// Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
				2252	armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2253	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2254	QuantizedVector<T>({
				2255	5, 5, 5,
				2256	5, 5, 5,
				2257	5, 5, 5
				2258	},
				2259	outputTensorInfo.GetQuantizationScale(),
				2260	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2261
				2262	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2263	workloadFactory,
				2264	memoryManager,
				2265	input,
				2266	kernel,
				2267	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2268	expectedOutput,
				2269	qScale,
				2270	qOffset,
				2271	layout,
				2272	padLeft,
				2273	padTop,
				2274	padRight,
				2275	padBottom,
				2276	strideX,
				2277	strideY,
				2278	dilationX,
				2279	dilationY);
				2280	}
				2281
				2282	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				2283	LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
				2284	armnn::IWorkloadFactory& workloadFactory,
				2285	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2286	const std::vector<float>& inputNoQuantizedValues,
				2287	armnn::TensorInfo& inputTensorInfo,
				2288	const std::vector<float>& kernelNoQuantizedValues,
				2289	armnn::TensorInfo& kernelTensorInfo,
				2290	const std::vector<float>& outputExpectedNoQuantizedValues,
				2291	armnn::TensorInfo& outputTensorInfo,
				2292	uint32_t dilationX,
				2293	uint32_t dilationY,
				2294	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				2295	bool biasEnabled = false)
				2296	{
				2297	float qScale;
				2298	int32_t qOffset;
				2299	switch (ArmnnType)
				2300	{
				2301	case armnn::DataType::QuantisedAsymm8:
				2302	{
				2303	qScale = 0.1f;
				2304	qOffset = 128;
				2305	break;
				2306	}
				2307	case armnn::DataType::QuantisedSymm16:
				2308	{
				2309	qScale = 0.1f;
				2310	qOffset = 0;
				2311	break;
				2312	}
				2313	case armnn::DataType::Float32:
				2314	default:
				2315	{
				2316	qScale = 0.f;
				2317	qOffset = 0;
				2318	break;
				2319	}
				2320	}
				2321
				2322	inputTensorInfo.SetQuantizationScale(qScale);
				2323	inputTensorInfo.SetQuantizationOffset(qOffset);
				2324	kernelTensorInfo.SetQuantizationScale(qScale);
				2325	kernelTensorInfo.SetQuantizationOffset(qOffset);
				2326	outputTensorInfo.SetQuantizationScale(qScale);
				2327	outputTensorInfo.SetQuantizationOffset(qOffset);
				2328
				2329	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2330	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				2331	inputTensorInfo.GetQuantizationScale(),
				2332	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2333	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2334	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				2335	kernelTensorInfo.GetQuantizationScale(),
				2336	kernelTensorInfo.GetQuantizationOffset())));
				2337	auto expectedOutput =
				2338	MakeTensor<T, 4>(outputTensorInfo,
				2339	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				2340	outputTensorInfo.GetQuantizationScale(),
				2341	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2342
				2343	uint32_t padLeft = 0;
				2344	uint32_t padTop = 0;
				2345	uint32_t padRight = 0;
				2346	uint32_t padBottom = 0;
				2347	uint32_t strideX = 1;
				2348	uint32_t strideY = 1;
				2349
				2350	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2351	workloadFactory,
				2352	memoryManager,
				2353	input,
				2354	kernel,
				2355	GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
				2356	expectedOutput,
				2357	qScale,
				2358	qOffset,
				2359	layout,
				2360	padLeft,
				2361	padTop,
				2362	padRight,
				2363	padBottom,
				2364	strideX,
				2365	strideY,
				2366	dilationX,
				2367	dilationY);
				2368	}
				2369
				2370	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2371	LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
				2372	armnn::IWorkloadFactory& workloadFactory,
				2373	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2374	bool biasEnabled,
				2375	const armnn::DataLayout layout)
				2376	{
				2377	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				2378	std::vector<float> inputNoQuantizedValues =
				2379	{
				2380	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2381	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2382	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2383	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2384	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2385	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2386	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2387	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2388	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2389	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2390	};
				2391
				2392	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2393	std::vector<float> kernelNoQuantizedValues =
				2394	{
				2395	1, 2, 3,
				2396	4, 5, 6,
				2397	7, 8, 9
				2398	};
				2399
				2400	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2401	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2402	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				2403	std::vector<float> outputExpectedNoQuantizedValues =
				2404	{
				2405	6., 5., 5., 5.,
				2406	6., 5., 5., 5.,
				2407	6., 5., 5., 5.,
				2408	3., 2., 2., 2.
				2409	};
				2410
				2411	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2412	workloadFactory,
				2413	memoryManager,
				2414	inputNoQuantizedValues,
				2415	inputTensorInfo,
				2416	kernelNoQuantizedValues,
				2417	kernelTensorInfo,
				2418	outputExpectedNoQuantizedValues,
				2419	outputTensorInfo,
				2420	3,
				2421	3,
				2422	layout,
				2423	biasEnabled);
				2424	}
				2425
				2426	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2427	LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
				2428	armnn::IWorkloadFactory& workloadFactory,
				2429	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2430	bool biasEnabled,
				2431	const armnn::DataLayout layout)
				2432	{
				2433	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				2434	std::vector<float> inputNoQuantizedValues =
				2435	{
				2436	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2437	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2438	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2439	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2440	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2441	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2442	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2443	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2444	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2445	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2446
				2447	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2448	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2449	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2450	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2451	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2452	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2453	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2454	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2455	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2456	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2457	};
				2458
				2459	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				2460	std::vector<float> kernelNoQuantizedValues =
				2461	{
				2462	1, 2, 3,
				2463	4, 5, 6,
				2464	7, 8, 9,
				2465
				2466	1, 2, 3,
				2467	4, 5, 6,
				2468	7, 8, 9
				2469	};
				2470
				2471	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2472	// therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2473	armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
				2474	std::vector<float> outputExpectedNoQuantizedValues =
				2475	{
				2476	6., 5., 5., 5.,
				2477	6., 5., 5., 5.,
				2478	6., 5., 5., 5.,
				2479	3., 2., 2., 2.,
				2480
				2481	6., 5., 5., 5.,
				2482	6., 5., 5., 5.,
				2483	6., 5., 5., 5.,
				2484	3., 2., 2., 2.
				2485	};
				2486
				2487	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2488	workloadFactory,
				2489	memoryManager,
				2490	inputNoQuantizedValues,
				2491	inputTensorInfo,
				2492	kernelNoQuantizedValues,
				2493	kernelTensorInfo,
				2494	outputExpectedNoQuantizedValues,
				2495	outputTensorInfo,
				2496	3,
				2497	3,
				2498	layout,
				2499	biasEnabled);
				2500	}
				2501
				2502	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2503	LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
				2504	armnn::IWorkloadFactory& workloadFactory,
				2505	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2506	bool biasEnabled,
				2507	const armnn::DataLayout layout)
				2508	{
				2509	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2510	std::vector<float> inputNoQuantizedValues =
				2511	{
				2512	10.0, 10.0, 10.0,
				2513	10.0, 10.0, 10.0,
				2514	10.0, 10.0, 10.0,
				2515
				2516	21.0, 22.0, 23.0,
				2517	24.0, 25.0, 26.0,
				2518	27.0, 28.0, 29.0
				2519	};
				2520
				2521	armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
				2522
				2523	std::vector<float> kernelNoQuantizedValues =
				2524	{
				2525	0.25f, 0.25f,
				2526	0.25f, 0.25f,
				2527
				2528	0.25f, 0.25f,
				2529	0.25f, 0.25f,
				2530
				2531	0.0f , 0.0f,
				2532	0.0f , 0.1f,
				2533
				2534	0.0f , 0.0f,
				2535	0.0f , 0.1f,
				2536
				2537	0.2f , 0.0f,
				2538	0.0f , 0.0f,
				2539
				2540	0.2f , 0.0f,
				2541	0.0f , 0.0f,
				2542
				2543	0.0f , 0.3f,
				2544	0.0f , 0.0f,
				2545
				2546	0.0f , 0.3f,
				2547	0.0f , 0.0f
				2548	};
				2549
				2550	armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
				2551	std::vector<float> outputExpectedNoQuantizedValues =
				2552	{
				2553	10.f, 10.f,
				2554	10.f, 10.f,
				2555
				2556	1.f, 1.f,
				2557	1.f, 1.f,
				2558
				2559	2.f, 2.f,
				2560	2.f, 2.f,
				2561
				2562	3.f, 3.f,
				2563	3.f, 3.f,
				2564
				2565	23.f, 24.f,
				2566	26.f, 27.f,
				2567
				2568	2.5f, 2.6000001f,
				2569	2.8f, 2.9f,
				2570
				2571	4.2000003f, 4.4f,
				2572	4.8f, 5.f,
				2573
				2574	6.6000004f, 6.9f,
				2575	7.5000005f, 7.8f
				2576	};
				2577
				2578
				2579	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2580	workloadFactory,
				2581	memoryManager,
				2582	inputNoQuantizedValues,
				2583	inputTensorInfo,
				2584	kernelNoQuantizedValues,
				2585	kernelTensorInfo,
				2586	outputExpectedNoQuantizedValues,
				2587	outputTensorInfo,
				2588	1,
				2589	1,
				2590	layout,
				2591	biasEnabled);
				2592	}
				2593
				2594	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2595	LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
				2596	armnn::IWorkloadFactory& workloadFactory,
				2597	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2598	bool biasEnabled,
				2599	const armnn::DataLayout layout)
				2600	{
				2601	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2602	std::vector<float> inputNoQuantizedValues =
				2603	{
				2604	10.0, 10.0, 10.0,
				2605	10.0, 10.0, 10.0,
				2606	10.0, 10.0, 10.0,
				2607
				2608	21.0, 22.0, 23.0,
				2609	24.0, 25.0, 26.0,
				2610	27.0, 28.0, 29.0
				2611	};
				2612
				2613	armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
				2614
				2615	std::vector<float> kernelNoQuantizedValues =
				2616	{
				2617	0.25f, 0.25f,
				2618	0.25f, 0.25f,
				2619
				2620	0.2f , 0.0f,
				2621	0.0f , 0.0f,
				2622
				2623	0.0f , 0.0f,
				2624	0.0f , 0.1f,
				2625
				2626	0.0f , 0.3f,
				2627	0.0f , 0.0f
				2628
				2629	};
				2630
				2631	armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
				2632	std::vector<float> outputExpectedNoQuantizedValues =
				2633	{
				2634	10.f, 10.f,
				2635	10.f, 10.f,
				2636
				2637	1.f, 1.f,
				2638	1.f, 1.f,
				2639
				2640	4.2000003f, 4.4f,
				2641	4.8f, 5.f,
				2642
				2643	6.6000004f, 6.9f,
				2644	7.5000005f, 7.8f
				2645	};
				2646
				2647
				2648	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2649	workloadFactory,
				2650	memoryManager,
				2651	inputNoQuantizedValues,
				2652	inputTensorInfo,
				2653	kernelNoQuantizedValues,
				2654	kernelTensorInfo,
				2655	outputExpectedNoQuantizedValues,
				2656	outputTensorInfo,
				2657	1,
				2658	1,
				2659	layout,
				2660	biasEnabled);
				2661	}
				2662
				2663	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				2664	LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
				2665	armnn::IWorkloadFactory& workloadFactory,
				2666	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2667	armnn::IWorkloadFactory& refWorkloadFactory,
				2668	const armnnUtils::DataLayoutIndexed& layout)
				2669	{
				2670	unsigned int inputHeight = 8;
				2671	unsigned int inputWidth = 16;
				2672	unsigned int inputChannels = 3;
				2673	unsigned int inputNum = 5;
				2674
				2675	unsigned int kernelHeight = 3;
				2676	unsigned int kernelWidth = 3;
				2677	unsigned int channelMultiplier = 1;
				2678
				2679	unsigned int strideX = 2;
				2680	unsigned int strideY = 3;
				2681	unsigned int padX = 1;
				2682	unsigned int padY = 1;
				2683
				2684	unsigned int outputNum = inputNum;
				2685	unsigned int outputChannels = inputChannels * channelMultiplier;
				2686	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				2687	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				2688
				2689	armnn::TensorInfo inputTensorInfo;
				2690	armnn::TensorInfo outputTensorInfo;
				2691	armnn::TensorInfo kernelDesc;
				2692	armnn::TensorInfo biasDesc;
				2693
				2694
				2695	std::vector<unsigned int> inputShape;
				2696	std::vector<unsigned int> outputShape;
				2697	std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
				2698	std::vector<unsigned int> biasShape{ outputChannels };
				2699	switch (layout.GetDataLayout())
				2700	{
				2701	case armnn::DataLayout::NCHW:
				2702	inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
				2703	outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
				2704	break;
				2705	case armnn::DataLayout ::NHWC:
				2706	inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
				2707	outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
				2708	break;
				2709	default:
				2710	throw armnn::InvalidArgumentException("unknown data layout ["
				2711	+ std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
				2712	}
				2713
				2714	float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
				2715	float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
				2716	int32_t qOffset = 0;
				2717
				2718	inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
				2719	outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
				2720	kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
				2721	biasDesc = armnn::TensorInfo(
				2722	1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
				2723
				2724	LayerTestResult<T, 4> ret(outputTensorInfo);
				2725
				2726	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
				2727	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
				2728	auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
				2729	biasDesc, 1028, 0.0f, 255.0f);
				2730
				2731	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2732	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2733
				2734	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2735	armnn::WorkloadInfo info;
				2736	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2737	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2738
				2739	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2740	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2741
				2742	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2743	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2744	data.m_Weight = &weightsTensor;
				2745	data.m_Bias = &biasTensor;
				2746	data.m_Parameters.m_StrideX = strideX;
				2747	data.m_Parameters.m_StrideY = strideY;
				2748	data.m_Parameters.m_PadLeft = padX;
				2749	data.m_Parameters.m_PadRight = padX;
				2750	data.m_Parameters.m_PadTop = padY;
				2751	data.m_Parameters.m_PadBottom = padY;
				2752	data.m_Parameters.m_BiasEnabled = true;
				2753	data.m_Parameters.m_DataLayout = layout.GetDataLayout();
				2754
				2755	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				2756	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				2757
				2758	armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
				2759	armnn::WorkloadInfo refInfo = info;
				2760	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				2761	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				2762
				2763	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2764	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
				2765
				2766	outputHandleRef->Allocate();
				2767	inputHandleRef->Allocate();
				2768
				2769	inputHandle->Allocate();
				2770	outputHandle->Allocate();
				2771
				2772	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				2773	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				2774
				2775	ExecuteWorkload(*workload, memoryManager);
				2776
				2777	workloadRef->PostAllocationConfigure();
				2778	workloadRef->Execute();
				2779
				2780	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2781	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				2782
				2783	return ret;
				2784	}
				2785
				2786	//
				2787	// Explicit template specializations
				2788	//
				2789
				2790	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2791	Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2792	armnn::IWorkloadFactory&,
				2793	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2794	bool,
				2795	armnn::DataLayout);
				2796
				2797	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2798	Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2799	armnn::IWorkloadFactory&,
				2800	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2801	bool,
				2802	armnn::DataLayout);
				2803
				2804	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2805	Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2806	armnn::IWorkloadFactory&,
				2807	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2808	bool,
				2809	armnn::DataLayout);
				2810
				2811	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2812	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2813	armnn::IWorkloadFactory&,
				2814	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2815	bool,
				2816	armnn::DataLayout);
				2817
				2818	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2819	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2820	armnn::IWorkloadFactory&,
				2821	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2822	bool,
				2823	armnn::DataLayout);
				2824
				2825	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2826	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2827	armnn::IWorkloadFactory&,
				2828	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2829	bool,
				2830	armnn::DataLayout);
				2831
				2832	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2833	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2834	armnn::IWorkloadFactory &workloadFactory,
				2835	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2836	bool biasEnabled,
				2837	const armnn::DataLayout layout);
				2838
				2839	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2840	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2841	armnn::IWorkloadFactory &workloadFactory,
				2842	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2843	bool biasEnabled,
				2844	const armnn::DataLayout layout);
				2845
				2846	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2847	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2848	armnn::IWorkloadFactory &workloadFactory,
				2849	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2850	bool biasEnabled,
				2851	const armnn::DataLayout layout);
				2852
				2853	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2854	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2855	armnn::IWorkloadFactory&,
				2856	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2857	bool,
				2858	armnn::DataLayout);
				2859
				2860	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2861	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2862	armnn::IWorkloadFactory&,
				2863	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2864	bool,
				2865	armnn::DataLayout);
				2866
				2867	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2868	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2869	armnn::IWorkloadFactory&,
				2870	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2871	bool,
				2872	armnn::DataLayout);
				2873
				2874	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2875	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2876	armnn::IWorkloadFactory&,
				2877	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2878	bool,
				2879	armnn::DataLayout);
				2880
				2881	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
				2882	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2883	armnn::IWorkloadFactory&,
				2884	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2885	bool,
				2886	armnn::DataLayout);
				2887
				2888	template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
				2889	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2890	armnn::IWorkloadFactory&,
				2891	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2892	bool,
				2893	armnn::DataLayout);
				2894
				2895	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2896	DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2897	armnn::IWorkloadFactory &workloadFactory,
				2898	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2899	bool biasEnabled,
				2900	const armnn::DataLayout layout);
				2901
				2902	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2903	DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2904	armnn::IWorkloadFactory &workloadFactory,
				2905	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2906	bool biasEnabled,
				2907	const armnn::DataLayout layout);
				2908
				2909	//
				2910	// Implementation functions
				2911	//
				2912
				2913	LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
				2914	armnn::IWorkloadFactory& workloadFactory,
				2915	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2916	bool biasEnabled,
				2917	const armnn::DataLayout layout)
				2918	{
				2919	return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2920	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2921	}
				2922
				2923	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
				2924	armnn::IWorkloadFactory& workloadFactory,
				2925	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2926	bool biasEnabled,
				2927	const armnn::DataLayout layout)
				2928	{
				2929	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2930	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2931	}
				2932
				2933	LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
				2934	armnn::IWorkloadFactory& workloadFactory,
				2935	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2936	bool biasEnabled,
				2937	const armnn::DataLayout layout)
				2938	{
				2939	return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2940	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2941	}
				2942
				2943	LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
				2944	armnn::IWorkloadFactory& workloadFactory,
				2945	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2946	bool biasEnabled)
				2947	{
				2948	return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
				2949	workloadFactory,
				2950	memoryManager,
				2951	0.f,
				2952	0,
				2953	biasEnabled,
				2954	armnn::DataLayout::NHWC);
				2955	}
				2956
				2957	LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
				2958	armnn::IWorkloadFactory& workloadFactory,
				2959	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2960	bool biasEnabled,
				2961	const armnn::DataLayout layout)
				2962	{
				2963	return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
				2964	workloadFactory,
				2965	memoryManager,
				2966	0.f,
				2967	0,
				2968	biasEnabled,
				2969	layout);
				2970	}
				2971
				2972	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
				2973	armnn::IWorkloadFactory& workloadFactory,
				2974	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2975	bool biasEnabled,
				2976	const armnn::DataLayout layout)
				2977	{
				2978	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				2979	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2980	}
				2981
				2982	LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
				2983	armnn::IWorkloadFactory& workloadFactory,
				2984	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2985	bool biasEnabled,
				2986	const armnn::DataLayout layout)
				2987	{
				2988	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2989	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2990	}
				2991
				2992	LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
				2993	armnn::IWorkloadFactory& workloadFactory,
				2994	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2995	bool biasEnabled,
				2996	const armnn::DataLayout layout)
				2997	{
				2998	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				2999	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3000	}
				3001
				3002	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
				3003	armnn::IWorkloadFactory& workloadFactory,
				3004	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3005	armnn::DataLayout layout)
				3006	{
				3007	return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3008	workloadFactory, memoryManager, layout, 0.0f, 0);
				3009	}
				3010
				3011	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
				3012	armnn::IWorkloadFactory& workloadFactory,
				3013	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3014	armnn::DataLayout layout)
				3015	{
				3016	return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
				3017	<armnn::DataType::Float32, armnn::DataType::Float32>(
				3018	workloadFactory, memoryManager, layout, 0.0f, 0);
				3019	}
				3020
				3021	LayerTestResult<float, 4> Convolution1dTest(
				3022	armnn::IWorkloadFactory& workloadFactory,
				3023	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3024	bool biasEnabled)
				3025	{
				3026	return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3027	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3028	}
				3029
				3030	LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
				3031	armnn::IWorkloadFactory& workloadFactory,
				3032	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3033	bool biasEnabled)
				3034	{
				3035	return Convolution1dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3036	workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
				3037	}
				3038
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	3039	LayerTestResult<uint8_t, 4> Convolution2dPerAxisQuantTest(
				3040	armnn::IWorkloadFactory& workloadFactory,
				3041	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3042	const armnn::DataLayout layout)
				3043	{
				3044	using namespace armnn;
				3045
				3046	const DataType inputType = DataType::QuantisedAsymm8;
				3047	const DataType kernelType = DataType::QuantizedSymm8PerAxis;
				3048	const DataType biasType = DataType::Signed32;
				3049
				3050	TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
				3051	TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
				3052
				3053	const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
				3054	constexpr unsigned int quantDimension = 0;
				3055
				3056	TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
				3057
				3058	const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
				3059	TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
				3060
				3061	std::vector<uint8_t> inputData =
				3062	{
				3063	138, 108, 138, 108, 138, 108
				3064	};
				3065
				3066	std::vector<int8_t> kernelData =
				3067	{
				3068	1, 2, 1, 2, 1, 2
				3069	};
				3070
				3071	std::vector<int32_t> biasData =
				3072	{
				3073	4, 4, 4
				3074	};
				3075
				3076	std::vector<uint8_t> expectedOutputData =
				3077	{
				3078	121, 118, 115, 121, 118, 115, 121, 118, 115
				3079	};
				3080
				3081	if (layout == DataLayout::NCHW)
				3082	{
				3083	PermuteTensorNhwcToNchw(inputInfo, inputData);
				3084	PermuteTensorNhwcToNchw(kernelInfo, kernelData);
				3085	PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
				3086	}
				3087
				3088	Convolution2dDescriptor descriptor;
				3089	descriptor.m_StrideX = 1;
				3090	descriptor.m_StrideY = 1;
				3091	descriptor.m_PadLeft = 0;
				3092	descriptor.m_PadRight = 0;
				3093	descriptor.m_PadTop = 0;
				3094	descriptor.m_PadBottom = 0;
				3095	descriptor.m_BiasEnabled = true;
				3096	descriptor.m_DataLayout = layout;
				3097
				3098	std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				3099	std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				3100
				3101	WorkloadInfo workloadInfo;
				3102	ScopedCpuTensorHandle weightTensor(kernelInfo);
				3103	ScopedCpuTensorHandle biasTensor(biasInfo);
				3104
				3105	AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
				3106	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				3107
				3108	Convolution2dQueueDescriptor queueDescriptor;
				3109	queueDescriptor.m_Parameters = descriptor;
				3110	queueDescriptor.m_Weight = &weightTensor;
				3111	queueDescriptor.m_Bias = &biasTensor;
				3112
				3113	AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
				3114	AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
				3115
				3116	std::unique_ptr<IWorkload> workload = workloadFactory.CreateConvolution2d(queueDescriptor, workloadInfo);
				3117	inputHandle->Allocate();
				3118	outputHandle->Allocate();
				3119
				3120	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				3121
				3122	ExecuteWorkload(*workload, memoryManager);
				3123
				3124	LayerTestResult<uint8_t, 4> ret(outputInfo);
				3125	CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
				3126	ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
				3127
				3128	return ret;
				3129	}
				3130
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3131	LayerTestResult<float,4> CompareConvolution2dTest(
				3132	armnn::IWorkloadFactory& workloadFactory,
				3133	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3134	armnn::IWorkloadFactory& refWorkloadFactory)
				3135	{
				3136	return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
				3137	workloadFactory, memoryManager, refWorkloadFactory);
				3138	}
				3139
				3140	LayerTestResult<float, 4> DepthwiseConvolution2dTest(
				3141	armnn::IWorkloadFactory& workloadFactory,
				3142	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3143	bool biasEnabled,
				3144	const armnn::DataLayout layout)
				3145	{
				3146	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3147	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3148	}
				3149
				3150	LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
				3151	armnn::IWorkloadFactory& workloadFactory,
				3152	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3153	bool biasEnabled)
				3154	{
				3155	return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3156	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3157	}
				3158
				3159	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
				3160	armnn::IWorkloadFactory& workloadFactory,
				3161	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3162	bool biasEnabled,
				3163	const armnn::DataLayout layout)
				3164	{
				3165	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3166	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3167	}
				3168
				3169	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
				3170	armnn::IWorkloadFactory& workloadFactory,
				3171	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3172	{
				3173	armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
				3174	auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
				3175
				3176	std::vector<float> kernelData;
				3177	std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
				3178	for (unsigned int i = 0; i < 64; ++i)
				3179	{
				3180	kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
				3181	}
				3182	armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
				3183	auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
				3184
				3185	std::vector<float> expectedOutputData(64, 0.f);
				3186	armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
				3187	auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
				3188
				3189	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3190	workloadFactory,
				3191	memoryManager,
				3192	input,
				3193	kernel,
				3194	boost::multi_array<float, 1>(),
				3195	expectedOutput,
				3196	0.f,
				3197	0,
				3198	armnn::DataLayout::NCHW);
				3199	}
				3200
				3201	LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
				3202	armnn::IWorkloadFactory& workloadFactory,
				3203	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3204	bool biasEnabled,
				3205	const armnn::DataLayout layout)
				3206	{
				3207	return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3208	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3209	}
				3210
				3211	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
				3212	armnn::IWorkloadFactory& workloadFactory,
				3213	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3214	bool biasEnabled,
				3215	const armnn::DataLayout layout)
				3216	{
				3217	return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3218	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3219	}
				3220
				3221	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
				3222	armnn::IWorkloadFactory& workloadFactory,
				3223	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3224	bool biasEnabled,
				3225	const armnn::DataLayout layout)
				3226	{
				3227	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
				3228	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3229	}
				3230
				3231	LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
				3232	armnn::IWorkloadFactory& workloadFactory,
				3233	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3234	{
				3235	return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3236	workloadFactory,
				3237	memoryManager,
				3238	0.f,
				3239	0,
				3240	false);
				3241	}
				3242
				3243	LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
				3244	armnn::IWorkloadFactory& workloadFactory,
				3245	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3246	bool biasEnabled,
				3247	const armnn::DataLayout layout)
				3248	{
				3249	return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				3250	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3251	}
				3252
				3253	LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
				3254	armnn::IWorkloadFactory& workloadFactory,
				3255	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3256	bool biasEnabled,
				3257	const armnn::DataLayout layout)
				3258	{
				3259	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
				3260	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3261	}
				3262
Teresa Charlin	d8df026	2019-11-11 12:28:15 +0000	[diff] [blame^]	3263	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
				3264	armnn::IWorkloadFactory& workloadFactory,
				3265	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3266	const armnn::DataLayout layout)
				3267	{
				3268	using namespace armnn;
				3269
				3270	const DataType inputType = DataType::QuantisedAsymm8;
				3271	const DataType kernelType = DataType::QuantizedSymm8PerAxis;
				3272	const DataType biasType = DataType::Signed32;
				3273
				3274	TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
				3275	TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
				3276
				3277	const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
				3278	const unsigned int quantDimension = 0;
				3279	TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
				3280
				3281	const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
				3282	constexpr unsigned int biasQuantDimension = 0;
				3283	TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
				3284
				3285	std::vector<uint8_t> inputData =
				3286	{
				3287	129, 130,
				3288	129, 130,
				3289	129, 130,
				3290	129, 130,
				3291	129, 130,
				3292	129, 130,
				3293	129, 130,
				3294	129, 130,
				3295	129, 130
				3296	};
				3297
				3298	std::vector<int8_t> kernelData =
				3299	{
				3300	1, 1, 1, 1,
				3301	1, 1, 1, 1,
				3302	1, 1, 1, 1,
				3303	1, 1, 1, 1
				3304	};
				3305
				3306	std::vector<int32_t> biasData =
				3307	{
				3308	4, 4, 4, 4
				3309	};
				3310
				3311	std::vector<uint8_t> expectedOutputData =
				3312	{
				3313	132, 130, 134, 131,
				3314	132, 130, 134, 131,
				3315	132, 130, 134, 131,
				3316	132, 130, 134, 131
				3317	};
				3318
				3319	if (layout == DataLayout::NCHW)
				3320	{
				3321	PermuteTensorNhwcToNchw(inputInfo, inputData);
				3322	PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
				3323	}
				3324
				3325	DepthwiseConvolution2dDescriptor descriptor;
				3326	descriptor.m_StrideX = 1;
				3327	descriptor.m_StrideY = 1;
				3328	descriptor.m_PadLeft = 0;
				3329	descriptor.m_PadRight = 0;
				3330	descriptor.m_PadTop = 0;
				3331	descriptor.m_PadBottom = 0;
				3332	descriptor.m_DilationX = 1;
				3333	descriptor.m_DilationY = 1;
				3334	descriptor.m_BiasEnabled = true;
				3335	descriptor.m_DataLayout = layout;
				3336
				3337	std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				3338	std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				3339
				3340	WorkloadInfo workloadInfo;
				3341	ScopedCpuTensorHandle weightTensor(kernelInfo);
				3342	ScopedCpuTensorHandle biasTensor(biasInfo);
				3343
				3344	AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
				3345	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				3346
				3347	DepthwiseConvolution2dQueueDescriptor queueDescriptor;
				3348	queueDescriptor.m_Parameters = descriptor;
				3349	queueDescriptor.m_Weight = &weightTensor;
				3350	queueDescriptor.m_Bias = &biasTensor;
				3351
				3352	AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
				3353	AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
				3354
				3355	std::unique_ptr<IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo);
				3356	inputHandle->Allocate();
				3357	outputHandle->Allocate();
				3358
				3359	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				3360
				3361	ExecuteWorkload(*workload, memoryManager);
				3362
				3363	LayerTestResult<uint8_t, 4> ret(outputInfo);
				3364
				3365	CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
				3366	ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
				3367
				3368	return ret;
				3369	}
				3370
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3371	LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
				3372	armnn::IWorkloadFactory& workloadFactory,
				3373	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3374	armnn::IWorkloadFactory& refWorkloadFactory,
				3375	const armnn::DataLayout layout)
				3376	{
				3377	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
				3378	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3379	}
				3380
				3381	LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
				3382	armnn::IWorkloadFactory& workloadFactory,
				3383	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3384	armnn::IWorkloadFactory& refWorkloadFactory,
				3385	const armnn::DataLayout layout)
				3386	{
				3387	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8>(
				3388	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3389	}