Blame - src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp - ml/armnn

blob: 89cdd96e3718072998c76cf6f39fa2c27e204fa5 [file] [log] [blame]

Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "Conv2dTestImpl.hpp"
				7
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	8	#include <QuantizeHelper.hpp>
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	9	#include <armnnUtils/TensorUtils.hpp>
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	10
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame^]	11	#include <armnn/utility/IgnoreUnused.hpp>
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	12	#include <armnnUtils/DataLayoutIndexed.hpp>
				13	#include <armnnUtils/Permute.hpp>
				14
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	15	#include <backendsCommon/CpuTensorHandle.hpp>
				16
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	17	#include <backendsCommon/test/DataLayoutUtils.hpp>
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	18	#include <backendsCommon/test/TensorCopyUtils.hpp>
				19	#include <backendsCommon/test/WorkloadTestUtils.hpp>
				20
				21	#include <test/TensorHelpers.hpp>
				22
				23	#include <boost/numeric/conversion/cast.hpp>
				24
				25	#include <string>
				26
				27	//
				28	// Static data
				29	//
				30
				31	// 2-channel bias used by a number of Conv2d tests.
				32	static std::vector<float> Bias2({0, 2});
				33
				34	static std::vector<float> Bias4({1, 2, 3, 4});
				35
				36	static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
				37
				38	// 3-channel 16x8 image used as common input data for a number of Conv2d tests.
				39	static std::vector<float> ConvInput3x8x16({
				40	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				41	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				42	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				43	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				44	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				45	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				46	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				47	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				48	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				49	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				50	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				51	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				52	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				53	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				54	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				55	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				56	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				57	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				58	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				59	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				60	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				61	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				62	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				63	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
				64	});
				65
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	66	using namespace armnnUtils;
				67
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	68	//
				69	// Helper templates
				70	//
				71
				72	// Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
				73	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				74	boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
				75	{
				76	if(biasEnabled)
				77	{
				78	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	79	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias2, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	80	return bias;
				81	}
				82	else
				83	{
				84	return boost::multi_array<T, 1>();
				85	}
				86	}
				87
				88	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				89	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				90	boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
				91	{
				92	if(biasEnabled)
				93	{
				94	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	95	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias4, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	96	return bias;
				97	}
				98	else
				99	{
				100	return boost::multi_array<T, 1>();
				101	}
				102	}
				103
				104	// Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
				105	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				106	boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
				107	{
				108	if(biasEnabled)
				109	{
				110	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	111	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias8, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	112	return bias;
				113	}
				114	else
				115	{
				116	return boost::multi_array<T, 1>();
				117	}
				118	}
				119
				120	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				121	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				122	boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
				123	{
				124	const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
				125	const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
				126	const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
				127
				128	switch (outputChannels)
				129	{
				130	case 2:
				131	default:
				132	{
				133	return GetBias2<ArmnnType>(biasEnabled, qScale);
				134	}
				135	case 4:
				136	{
				137	return GetBias4<ArmnnType>(biasEnabled, qScale);
				138	}
				139	case 8:
				140	{
				141	return GetBias8<ArmnnType>(biasEnabled, qScale);
				142	}
				143	}
				144	}
				145
				146	//
				147	// Implementation templates
				148	//
				149
				150	// Mapping from input type to bias type for fully connected layers.
				151	// float => float, uint8_t => int32_t
				152	template<typename T>
				153	struct FullyConnectedBiasTypeForInputType;
				154
				155	template<>
				156	struct FullyConnectedBiasTypeForInputType<float>
				157	{
				158	using Type = float;
				159	};
				160
				161	template<>
				162	struct FullyConnectedBiasTypeForInputType<uint8_t>
				163	{
				164	using Type = int32_t;
				165	};
				166
				167	// Modifies a std::vector in-place using a specified bias.
				168	template<typename T, typename B>
				169	void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
				170	const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
				171	{
				172	BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) \|\| (!armnn::IsQuantizedType<T>()),
				173	"Invalid type and parameter combination.");
				174	BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) \|\| (!armnn::IsQuantizedType<B>()),
				175	"Invalid type and parameter combination.");
				176
				177	// Note we need to dequantize and re-quantize the image value and the bias.
				178	for (uint32_t i = 0; i < bias.size(); ++i)
				179	{
				180	float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
				181	for (uint32_t y = 0; y < h; ++y)
				182	{
				183	for (uint32_t x = 0; x < w; ++x)
				184	{
				185	uint32_t offset = (i * h + y) * w + x;
				186	BOOST_ASSERT(offset < v.size());
				187	T& outRef = v[offset];
				188	float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
				189	outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
				190	}
				191	}
				192	}
				193	}
				194
				195	//
				196	// Convolution2d implementations
				197	//
				198
				199	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				200	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				201	LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
				202	armnn::IWorkloadFactory& workloadFactory,
				203	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				204	const boost::multi_array<T, 4>& originalInput,
				205	const boost::multi_array<T, 4>& originalKernel,
				206	const boost::multi_array<B, 1>& bias,
				207	const boost::multi_array<T, 4>& originalOutputExpected,
				208	float qScale,
				209	int32_t qOffset,
				210	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				211	uint32_t padLeft = 0,
				212	uint32_t padTop = 0,
				213	uint32_t padRight = 0,
				214	uint32_t padBottom = 0,
				215	uint32_t strideX = 1,
				216	uint32_t strideY = 1,
				217	uint32_t dilationX = 1,
				218	uint32_t dilationY = 1)
				219	{
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame^]	220	armnn::IgnoreUnused(memoryManager);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	221	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				222	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				223	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				224	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				225
				226	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				227	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				228	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				229	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				230
				231	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				232	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				233	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				234	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				235
				236	bool biasEnabled = bias.size() > 0;
				237
				238	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				239	BOOST_ASSERT(inputNum == 1);
				240	BOOST_ASSERT(outputNum == 1);
				241
				242	// If a bias is used, its size must equal the number of output channels.
				243	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				244
				245
				246	// Note these tensors will use two (identical) batches.
				247	armnn::TensorInfo inputTensorInfo =
				248	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				249	armnn::TensorInfo outputTensorInfo =
				250	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				251	armnn::TensorInfo kernelDesc =
				252	armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
				253	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				254
				255	// Set quantization parameters if the requested type is a quantized type.
				256	if(armnn::IsQuantizedType<T>())
				257	{
				258	inputTensorInfo.SetQuantizationScale(qScale);
				259	inputTensorInfo.SetQuantizationOffset(qOffset);
				260	outputTensorInfo.SetQuantizationScale(qScale);
				261	outputTensorInfo.SetQuantizationOffset(qOffset);
				262	kernelDesc.SetQuantizationScale(qScale);
				263	kernelDesc.SetQuantizationOffset(qOffset);
				264	biasDesc.SetQuantizationScale(qScale*qScale);
				265	biasDesc.SetQuantizationOffset(0);
				266	}
				267
				268	LayerTestResult<T, 4> ret(outputTensorInfo);
				269
				270	// Construct input data - two batches of the same input image.
				271	std::vector<T> inputImage;
				272	inputImage.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				273	std::vector<T> inputData;
				274	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				275	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				276
				277	// at this point if we require it permute the input data
				278	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				279	if (layout == armnn::DataLayout::NHWC)
				280	{
				281	std::vector<T> tmp(inputData.size());
				282	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				283	inputData = tmp;
				284	}
				285
				286	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				287
				288	std::vector<T> outputImage;
				289	outputImage.assign(originalOutputExpected.data(),
				290	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				291
				292	// Apply bias to output image if it is enabled.
				293	if(biasEnabled)
				294	{
				295	std::vector<T> biasV;
				296	biasV.assign(bias.data(), bias.data() + outputChannels);
				297	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				298	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				299	outputWidth, outputHeight);
				300	}
				301
				302	// Construct expected output data - two identical images.
				303	std::vector<T> outputData;
				304	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				305	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				306
				307	// at this point if we require it permute the expected output
				308	if (layout == armnn::DataLayout::NHWC)
				309	{
				310	std::vector<T> tmp(outputData.size());
				311	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				312	outputData = tmp;
				313	}
				314	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				315
				316	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				317	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				318
				319	armnn::Convolution2dQueueDescriptor data;
				320	armnn::WorkloadInfo info;
				321	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				322	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				323	// Permute the kernel if necessary
				324	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				325	if (layout == armnn::DataLayout::NHWC)
				326	{
				327	armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
				328	}
				329	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				330
				331	if(biasEnabled)
				332	{
				333	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				334	}
				335
				336	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				337	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				338
				339	data.m_Weight = &weightsTensor;
				340	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				341	data.m_Parameters.m_StrideX = strideX;
				342	data.m_Parameters.m_StrideY = strideY;
				343	data.m_Parameters.m_PadLeft = padLeft;
				344	data.m_Parameters.m_PadRight = padRight;
				345	data.m_Parameters.m_PadTop = padTop;
				346	data.m_Parameters.m_PadBottom = padBottom;
				347	data.m_Parameters.m_BiasEnabled = biasEnabled;
				348	data.m_Parameters.m_DataLayout = layout;
				349	data.m_Parameters.m_DilationX = dilationX;
				350	data.m_Parameters.m_DilationY = dilationY;
				351
				352	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				353	inputHandle->Allocate();
				354	outputHandle->Allocate();
				355
				356	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				357
				358	ExecuteWorkload(*workload, memoryManager);
				359
				360	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				361
				362	return ret;
				363	}
				364
				365	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				366	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				367	LayerTestResult<T, 4> SimpleConvolution2dNhwcTestImpl(
				368	armnn::IWorkloadFactory& workloadFactory,
				369	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				370	const boost::multi_array<T, 4>& input,
				371	const boost::multi_array<T, 4>& kernel,
				372	const boost::multi_array<B, 1>& bias,
				373	const boost::multi_array<T, 4>& outputExpected,
				374	const armnn::DataLayout dataLayout,
				375	float qScale,
				376	int32_t qOffset,
				377	uint32_t padLeft = 1,
				378	uint32_t padTop = 1,
				379	uint32_t padRight = 1,
				380	uint32_t padBottom = 1,
				381	uint32_t strideX = 1,
				382	uint32_t strideY = 1)
				383	{
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame^]	384	armnn::IgnoreUnused(qScale, qOffset);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	385	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				386	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
				387	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
				388	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
				389
				390	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				391	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				392	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				393	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				394
				395	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				396	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				397	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				398	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				399
				400	bool biasEnabled = bias.size() > 0;
				401
				402	// Creates the tensors.
				403	armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
				404	armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
				405	ArmnnType);
				406	armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
				407	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				408
				409	// Construct the input data.
				410	std::vector<T> inputData;
				411	inputData.assign(input.data(), input.data() + inputHeightinputWidthinputChannels);
				412	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				413
				414	// Construct the output data, with bias applied, as appropriate.
				415	std::vector<T> outputData;
				416	outputData.assign(outputExpected.data(), outputExpected.data() + outputHeightoutputWidthoutputChannels);
				417
				418	LayerTestResult<T, 4> ret(outputTensorInfo);
				419	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				420
				421	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				422	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				423
				424	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				425	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				426
				427	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				428
				429	armnn::Convolution2dQueueDescriptor data;
				430
				431	data.m_Weight = &weightsTensor;
				432	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				433	data.m_Parameters.m_StrideX = strideX;
				434	data.m_Parameters.m_StrideY = strideY;
				435	data.m_Parameters.m_PadLeft = padLeft;
				436	data.m_Parameters.m_PadRight = padRight;
				437	data.m_Parameters.m_PadTop = padTop;
				438	data.m_Parameters.m_PadBottom = padBottom;
				439	data.m_Parameters.m_BiasEnabled = biasEnabled;
				440	data.m_Parameters.m_DataLayout = dataLayout;
				441
				442	armnn::WorkloadInfo info;
				443	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				444	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				445
				446	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				447	inputHandle->Allocate();
				448	outputHandle->Allocate();
				449
				450	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				451
				452	ExecuteWorkload(*workload, memoryManager);
				453
				454	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				455
				456	return ret;
				457	}
				458
				459	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				460	LayerTestResult<T,4> Convolution1dTestImpl(
				461	armnn::IWorkloadFactory& workloadFactory,
				462	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				463	float qScale,
				464	int32_t qOffset,
				465	bool biasEnabled)
				466	{
				467	using B = armnn::ResolveType<ArmnnBType>;
				468	// Until we have a specialist 1D convolution layer, we can fake one using
				469	// 2D convolution with the final dimension set to 1.
				470	// I don't anticipate this being particularly slow, given that convolution is implemented
				471	// as a matrix multiplication, at which point dimension doesn't matter.
				472
				473	unsigned int batchSize = 1;
				474	unsigned int inputChannels = 2;
				475	unsigned int outputChannels = 3;
				476	unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
				477	unsigned int kernelSize = 3;
				478	unsigned int padSize = 2;
				479	unsigned int stride = 1;
				480	unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
				481
				482	armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
				483	armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
				484	armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
				485	armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
				486
				487	// Set quantization parameters if the requested type is a quantized type.
				488	if(armnn::IsQuantizedType<T>())
				489	{
				490	inputInfo.SetQuantizationScale(qScale);
				491	inputInfo.SetQuantizationOffset(qOffset);
				492	outputInfo.SetQuantizationScale(qScale);
				493	outputInfo.SetQuantizationOffset(qOffset);
				494	kernelInfo.SetQuantizationScale(qScale);
				495	kernelInfo.SetQuantizationOffset(qOffset);
				496	biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
				497	biasInfo.SetQuantizationOffset(0);
				498	}
				499
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	500	std::vector<T> inputData = QuantizedVector<T>(
				501	{
				502	5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
				503	-3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
				504	},
				505	inputInfo.GetQuantizationScale(),
				506	inputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	507
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	508	std::vector<T> kernelData = QuantizedVector<T>(
				509	{
				510	1.0f, 0.0f, 0.0f,
				511	0.0f, 2.0f, -1.5f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	512
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	513	0.0f, 0.0f, 0.0f,
				514	0.2f, 0.2f, 0.2f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	515
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	516	0.5f, 0.0f, 0.5f,
				517	0.0f, -1.0f, 0.0f
				518	},
				519	kernelInfo.GetQuantizationScale(),
				520	kernelInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	521
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	522	std::vector<B> biasData =
				523	QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	524
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	525	std::vector<T> outputData = QuantizedVector<T>(
				526	{
				527	4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	528	-0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	529	2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
				530	},
				531	outputInfo.GetQuantizationScale(),
				532	outputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	533
				534	// Optionally apply bias to output image.
				535	if(biasEnabled)
				536	{
				537	ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
				538	biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
				539	1, outputSize);
				540	}
				541
				542	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				543	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				544
				545	armnn::Convolution2dQueueDescriptor data;
				546	armnn::WorkloadInfo info;
				547	armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
				548	armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
				549
				550	AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
				551	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				552
				553	AddInputToWorkload(data, info, inputInfo, inputHandle.get());
				554	AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
				555
				556	data.m_Weight = &weightsTensor;
				557	data.m_Bias = &biasTensor;
				558	data.m_Parameters.m_StrideX = 1;
				559	data.m_Parameters.m_StrideY = stride;
				560	data.m_Parameters.m_PadLeft = 0;
				561	data.m_Parameters.m_PadRight = 0;
				562	data.m_Parameters.m_PadTop = padSize;
				563	data.m_Parameters.m_PadBottom = padSize;
				564	data.m_Parameters.m_BiasEnabled = biasEnabled;
				565
				566	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				567	inputHandle->Allocate();
				568	outputHandle->Allocate();
				569
				570	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				571
				572	ExecuteWorkload(*workload, memoryManager);
				573
				574	// Output
				575	LayerTestResult<T,4> ret(outputInfo);
				576	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				577	ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
				578	return ret;
				579	}
				580
				581	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				582	LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
				583	armnn::IWorkloadFactory& workloadFactory,
				584	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				585	float qScale,
				586	int32_t qOffset,
				587	bool biasEnabled,
				588	armnn::DataLayout dataLayout)
				589	{
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame^]	590	armnn::IgnoreUnused(biasEnabled);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	591	// Use common single-batch 5x5 image.
				592
				593	armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
				594	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				595	{
				596	1, 5, 2, 3,
				597	8, 7, 3, 6,
				598	3, 3, 9, 1
				599	});
				600
				601
				602	// Use a 2-element batch of 3-channel 3x3 kernels.
				603	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				604	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
				605	4, 5, 6,
				606	0, 0, 0,
				607	3, 2, 1
				608	});
				609
				610	// Expected output is 1 batch of a 5x5 image.
				611	armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
				612
				613	const std::vector<float> outputData =
				614	{
				615	23, 41, 33, 21,
				616	44, 65, 76, 52,
				617	82, 85, 79, 42
				618	};
				619
				620	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				621
				622	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				623	workloadFactory,
				624	memoryManager,
				625	input,
				626	kernel,
				627	boost::multi_array<T, 1>(),
				628	expectedOutput,
				629	dataLayout,
				630	qScale,
				631	qOffset);
				632	}
				633
				634	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				635	LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
				636	armnn::IWorkloadFactory& workloadFactory,
				637	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				638	float qScale,
				639	int32_t qOffset,
				640	bool biasEnabled,
				641	const armnn::DataLayout& dataLayout)
				642	{
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame^]	643	armnn::IgnoreUnused(biasEnabled);
Derek Lamberti	c374ff0	2019-12-10 21:57:35 +0000	[diff] [blame]	644
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	645	// Input is a single-batch, 1 channel, 5x5 image.
				646	armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
				647	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				648	{
				649	1, 5, 2, 3, 5,
				650	8, 7, 3, 6, 3,
				651	3, 3, 9, 1, 9,
				652	4, 1, 8, 1, 3,
				653	6, 8, 1, 9, 2
				654	});
				655
				656	// Use a 3x3 kernel.
				657	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				658	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
				659	{
				660	4, 5, 6,
				661	0, 0, 0,
				662	3, 2, 1
				663	});
				664
				665	// Expected output is a single-batch, 1 channel, 3x3 image.
				666	armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
				667
				668	const std::vector<T> outputData =
				669	{
				670	23, 33, 24,
				671	91, 99, 48,
				672	26, 50, 19
				673	};
				674
				675	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				676
				677	uint32_t padLeft = 1;
				678	uint32_t padTop = 1;
				679	uint32_t padRight = 1;
				680	uint32_t padBottom = 1;
				681	uint32_t strideX = 2;
				682	uint32_t strideY = 2;
				683
				684	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				685	workloadFactory,
				686	memoryManager,
				687	input,
				688	kernel,
				689	boost::multi_array<T, 1>(),
				690	expectedOutput,
				691	dataLayout,
				692	qScale,
				693	qOffset,
				694	padLeft,
				695	padTop,
				696	padRight,
				697	padBottom,
				698	strideX,
				699	strideY);
				700	}
				701
				702	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				703	LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
				704	armnn::IWorkloadFactory& workloadFactory,
				705	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				706	float qScale,
				707	int32_t qOffset,
				708	bool biasEnabled,
				709	const armnn::DataLayout layout)
				710	{
				711	// Use common single-batch 3-channel 16x8 image.
				712	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	713	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	714
				715	// Use a 2-element batch with 3-channel 3x5 kernels.
				716	armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
				717	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	718	QuantizedVector<T>({
				719	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	720	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	721	1, 1, 1,
				722	1, 1, 1,
				723	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	724
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	725	0, 0, 0,
				726	0, 0, 0,
				727	0, 0, 0,
				728	0, 0, 0,
				729	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	730
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	731	2, 2, 2,
				732	2, 2, 2,
				733	2, 2, 2,
				734	2, 2, 2,
				735	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	736
				737
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	738	0, 0, 0,
				739	0, 0, 0,
				740	0, 0, 0,
				741	0, 0, 0,
				742	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	743
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	744	1, 1, 1,
				745	1, 1, 1,
				746	1, 1, 1,
				747	1, 1, 1,
				748	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	749
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	750	0, 0, 0,
				751	0, 0, 0,
				752	0, 0, 0,
				753	0, 0, 0,
				754	0, 0, 0
				755	},
				756	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	757
				758	// Expected output is 2 batch elements of a 1-channel 14x4 image.
				759	armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
				760	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	761	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	762	-24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
				763	-25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
				764	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				765	-23.5f, -23.5f, -23.5f,
				766	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				767	-23.5f, -23.5f, -23.5f,
				768
				769	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				770	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				771	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				772	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	773	},
				774	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	775
				776	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				777	workloadFactory,
				778	memoryManager,
				779	input,
				780	kernel,
				781	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				782	expectedOutput,
				783	qScale,
				784	qOffset,
				785	layout);
				786	}
				787
				788	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				789	typename T = armnn::ResolveType<ArmnnType>>
				790	LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
				791	armnn::IWorkloadFactory& workloadFactory,
				792	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				793	float qScale,
				794	int32_t qOffset,
				795	bool biasEnabled,
				796	const armnn::DataLayout layout)
				797	{
				798	// Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
				799
				800	// Use common single-batch 3-channel 16x8 image.
				801	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	802	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	803
				804	// Use a 2-element batch of 3-channel 3x3 kernels.
				805	armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
				806	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	807	QuantizedVector<T>({
				808	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	809	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	810	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	811
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	812	0, 0, 0,
				813	0, 0, 0,
				814	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	815
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	816	2, 2, 2,
				817	2, 2, 2,
				818	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	819
				820
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	821	0, 0, 0,
				822	0, 0, 0,
				823	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	824
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	825	1, 1, 1,
				826	1, 1, 1,
				827	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	828
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	829	0, 0, 0,
				830	0, 0, 0,
				831	0, 0, 0
				832	},
				833	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	834
				835	// Expected output is 1 batch of a 2-channel 14x6 image.
				836	armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
				837	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	838	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	839	-15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
				840	-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
				841	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				842	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				843	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				844	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				845
				846	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				847	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				848	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				849	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				850	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				851	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	852	},
				853	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	854
				855	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				856	workloadFactory,
				857	memoryManager,
				858	input,
				859	kernel,
				860	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				861	expectedOutput,
				862	qScale,
				863	qOffset,
				864	layout);
				865	}
				866
				867	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				868	typename T = armnn::ResolveType<ArmnnType>>
				869	LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
				870	armnn::IWorkloadFactory& workloadFactory,
				871	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				872	const armnn::DataLayout layout,
				873	float qScale,
				874	int32_t qOffset)
				875	{
				876	// Use a single-batch 1-channel 3x3 image as input.
				877	armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
				878	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	879	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	880	11,21,31,
				881	12,22,32,
				882	13,23,33
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	883	},
				884	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	885
				886	// Use 1 batch of a 1-channel 2x2 kernel.
				887	armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
				888	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	889	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	890	-11,-21,
				891	-12,-22,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	892	},
				893	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	894
				895	// Expected output is 1 batch of a 1-channel 6x8 image.
				896	// Manually calculated like this:
				897	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				898	//[-110 -210 -120 -2211 ; -110 -210 -1211 -2221 ; -110 -210 -1221 -2231 ; -110 -210 -1231 -220 ..]
				899	//[-110 -2111 -120 -2212 ; -1111 -2121 -1212 -2222 ; -1121 -2131 -1222 -2232 ; -1131 -210 -1232 -220 ..]
				900	//[-110 -2112 -120 -2213 ; -1112 -2122 -1213 -2223 ; -1122 -2132 -1223 -2233 ; -1132 -210 -1233 -220 ..]
				901	//[-110 -2113 -120 -220 ; -1113 -2123 -120 -220 ; -1123 -2133 -120 -220 ; -1133 -210 -120 -220 ..]
				902	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				903	//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
				904	armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
				905	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	906	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	907	0, 0, 0, 0, 0, 0,
				908	-242, -594, -934, -372, 0, 0,
				909	-495, -1190, -1850, -725, 0, 0,
				910	-538, -1256, -1916, -748, 0, 0,
				911	-273, -626, -946, -363, 0, 0,
				912	0, 0, 0, 0, 0, 0,
				913	0, 0, 0, 0, 0, 0,
				914	0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	915	},
				916	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	917
				918	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				919	workloadFactory,
				920	memoryManager,
				921	input,
				922	kernel,
				923	GetBias2<ArmnnBType>(false, qScale * qScale),
				924	expectedOutput,
				925	qScale,
				926	qOffset,
				927	layout,
				928	1, // Padding left.
				929	2, // Padding top.
				930	3, // Padding right.
				931	4); // Padding bottom.
				932	}
				933
				934	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				935	typename T = armnn::ResolveType<ArmnnType>>
				936	LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
				937	armnn::IWorkloadFactory& workloadFactory,
				938	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				939	const armnn::DataLayout layout,
				940	float qScale,
				941	int32_t qOffset)
				942	{
				943	// Use a single-batch 1-channel 5x5 image as input.
				944	armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
				945	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	946	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	947	11,21,31,41,51,
				948	12,22,32,42,52,
				949	13,23,33,43,53,
				950	14,24,34,44,54,
				951	15,25,35,45,55,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	952	}, qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	953
				954	// Use 1 batch of a 1-channel 4x4 kernel.
				955	armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
				956	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	957	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	958	-11,-21,-31,-41,
				959	-12,-22,-32,-42,
				960	-13,-23,-33,-43,
				961	-14,-24,-34,-44,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	962	},
				963	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	964
				965	// Expected output is 1 batch of a 1-channel 5x5 image.
				966	armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
				967	std::vector<T> myVec(outputDesc.GetNumElements(), 0);
				968	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	969	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	970	-7140, -10580, -13940, -9300, -5230,
				971	-9590, -14120, -18520, -12290, -6860,
				972	-9980, -14560, -18960, -12560, -7000,
				973	-7518, -10904, -14144, -9318, -5152,
				974	-5032, -7256, -9376, -6142, -3368,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	975	},
				976	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	977
				978	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				979	workloadFactory,
				980	memoryManager,
				981	input,
				982	kernel,
				983	GetBias2<ArmnnBType>(false, qScale * qScale),
				984	expectedOutput,
				985	qScale,
				986	qOffset,
				987	layout,
				988	1, // Padding left.
				989	1, // Padding top.
				990	2, // Padding right.
				991	2); // Padding bottom.
				992	}
				993
				994	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				995	LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
				996	armnn::IWorkloadFactory& workloadFactory,
				997	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				998	const std::vector<float>& inputNoQuantizedValues,
				999	armnn::TensorInfo& inputTensorInfo,
				1000	const std::vector<float>& kernelNoQuantizedValues,
				1001	armnn::TensorInfo& kernelTensorInfo,
				1002	const std::vector<float>& outputExpectedNoQuantizedValues,
				1003	armnn::TensorInfo& outputTensorInfo,
				1004	uint32_t dilationX,
				1005	uint32_t dilationY,
				1006	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1007	uint32_t padLeft = 0,
				1008	uint32_t padTop = 0,
				1009	uint32_t padRight = 0,
				1010	uint32_t padBottom = 0,
				1011	uint32_t strideX = 1,
				1012	uint32_t strideY = 1,
				1013	bool biasEnabled = false
				1014	)
				1015	{
				1016	float qScale;
				1017	int32_t qOffset;
				1018	switch (ArmnnType)
				1019	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	1020	case armnn::DataType::QAsymmU8:
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1021	{
				1022	qScale = 0.1f;
				1023	qOffset = 128;
				1024	break;
				1025	}
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	1026	case armnn::DataType::QSymmS16:
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1027	{
				1028	qScale = 0.1f;
				1029	qOffset = 0;
				1030	break;
				1031	}
				1032	case armnn::DataType::Float32:
				1033	default:
				1034	{
				1035	qScale = 0.f;
				1036	qOffset = 0;
				1037	break;
				1038	}
				1039	}
				1040
				1041	inputTensorInfo.SetQuantizationScale(qScale);
				1042	inputTensorInfo.SetQuantizationOffset(qOffset);
				1043	kernelTensorInfo.SetQuantizationScale(qScale);
				1044	kernelTensorInfo.SetQuantizationOffset(qOffset);
				1045	outputTensorInfo.SetQuantizationScale(qScale);
				1046	outputTensorInfo.SetQuantizationOffset(qOffset);
				1047
				1048	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1049	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				1050	inputTensorInfo.GetQuantizationScale(),
				1051	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1052	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1053	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				1054	kernelTensorInfo.GetQuantizationScale(),
				1055	kernelTensorInfo.GetQuantizationOffset())));
				1056	auto expectedOutput =
				1057	MakeTensor<T, 4>(outputTensorInfo,
				1058	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				1059	outputTensorInfo.GetQuantizationScale(),
				1060	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1061
				1062	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				1063	workloadFactory,
				1064	memoryManager,
				1065	input,
				1066	kernel,
				1067	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				1068	expectedOutput,
				1069	qScale,
				1070	qOffset,
				1071	layout,
				1072	padLeft,
				1073	padTop,
				1074	padRight,
				1075	padBottom,
				1076	strideX,
				1077	strideY,
				1078	dilationX,
				1079	dilationY);
				1080	}
				1081
				1082	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1083	LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
				1084	armnn::IWorkloadFactory& workloadFactory,
				1085	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1086	bool biasEnabled,
				1087	const armnn::DataLayout layout)
				1088	{
				1089	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1090	std::vector<float> inputNoQuantizedValues =
				1091	{
				1092	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1093	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1094	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1095	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1096	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1097	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1098	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1099	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1100	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1101	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1102	};
				1103
				1104	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				1105	std::vector<float> kernelNoQuantizedValues =
				1106	{
				1107	1, 2, 3,
				1108	4, 5, 6,
				1109	7, 8, 9
				1110	};
				1111
				1112	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1113	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1114	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1115	std::vector<float> outputExpectedNoQuantizedValues =
				1116	{
				1117	6., 5., 5., 5.,
				1118	6., 5., 5., 5.,
				1119	6., 5., 5., 5.,
				1120	3., 2., 2., 2.
				1121	};
				1122
				1123	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1124	workloadFactory,
				1125	memoryManager,
				1126	inputNoQuantizedValues,
				1127	inputTensorInfo,
				1128	kernelNoQuantizedValues,
				1129	kernelTensorInfo,
				1130	outputExpectedNoQuantizedValues,
				1131	outputTensorInfo,
				1132	3,
				1133	3,
				1134	layout,
				1135	biasEnabled);
				1136	}
				1137
				1138	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1139	LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
				1140	armnn::IWorkloadFactory& workloadFactory,
				1141	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1142	bool biasEnabled,
				1143	const armnn::DataLayout layout)
				1144	{
				1145	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				1146	std::vector<float> inputNoQuantizedValues =
				1147	{
				1148	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1149	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1150	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1151	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1152	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1153	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1154	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1155	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1156	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1157	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1158
				1159	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1160	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1161	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1162	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1163	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1164	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1165	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1166	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1167	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1168	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1169	};
				1170
				1171	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				1172	std::vector<float> kernelNoQuantizedValues =
				1173	{
				1174	1, 2, 3,
				1175	4, 5, 6,
				1176	7, 8, 9,
				1177
				1178	1, 2, 3,
				1179	4, 5, 6,
				1180	7, 8, 9
				1181	};
				1182
				1183	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1184	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1185	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1186	std::vector<float> outputExpectedNoQuantizedValues =
				1187	{
				1188	12., 10., 10., 10.,
				1189	12., 10., 10., 10.,
				1190	12., 10., 10., 10.,
				1191	6., 4., 4., 4.
				1192	};
				1193
				1194	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1195	workloadFactory,
				1196	memoryManager,
				1197	inputNoQuantizedValues,
				1198	inputTensorInfo,
				1199	kernelNoQuantizedValues,
				1200	kernelTensorInfo,
				1201	outputExpectedNoQuantizedValues,
				1202	outputTensorInfo,
				1203	3,
				1204	3,
				1205	layout,
				1206	biasEnabled);
				1207	}
				1208
				1209	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1210	LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
				1211	armnn::IWorkloadFactory &workloadFactory,
				1212	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				1213	bool biasEnabled,
				1214	const armnn::DataLayout layout)
				1215	{
				1216	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1217	std::vector<float> inputNoQuantizedValues =
				1218	{
				1219	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1220	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1221	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1222	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1223	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1224	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1225	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1226	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1227	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1228	1, 1, 1, 1, 1, 1, 1, 1, 1, 1
				1229	};
				1230
				1231	armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
				1232	std::vector<float> kernelNoQuantizedValues =
				1233	{
				1234	1, 2,
				1235	3, 4
				1236	};
				1237
				1238	// Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
				1239	// therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
				1240	// where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
				1241	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1242	std::vector<float> outputExpectedNoQuantizedValues =
				1243	{
				1244	4, 7, 7, 3,
				1245	6, 10, 10, 4,
				1246	6, 10, 10, 4,
				1247	2, 3, 3, 1
				1248	};
				1249	uint32_t padLeft = 1;
				1250	uint32_t padTop = 1;
				1251	uint32_t padRight = 1;
				1252	uint32_t padBottom = 1;
				1253
				1254	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1255	workloadFactory,
				1256	memoryManager,
				1257	inputNoQuantizedValues,
				1258	inputTensorInfo,
				1259	kernelNoQuantizedValues,
				1260	kernelTensorInfo,
				1261	outputExpectedNoQuantizedValues,
				1262	outputTensorInfo,
				1263	2,
				1264	2,
				1265	layout,
				1266	padLeft,
				1267	padTop,
				1268	padRight,
				1269	padBottom,
				1270	3,
				1271	3,
				1272	biasEnabled
				1273	);
				1274	}
				1275
				1276	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				1277	LayerTestResult<T,4> CompareConvolution2dTestImpl(
				1278	armnn::IWorkloadFactory& workloadFactory,
				1279	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1280	armnn::IWorkloadFactory& refWorkloadFactory)
				1281	{
				1282	unsigned int inputHeight = 8;
				1283	unsigned int inputWidth = 16;
				1284	unsigned int inputChannels = 3;
				1285	unsigned int inputNum = 5;
				1286
				1287	unsigned int kernelHeight = 3;
				1288	unsigned int kernelWidth = 3;
				1289
				1290	unsigned int strideX = 2;
				1291	unsigned int strideY = 3;
				1292	unsigned int padX = 1;
				1293	unsigned int padY = 1;
				1294
				1295	unsigned int outputNum = inputNum;
				1296	unsigned int outputChannels = 2;
				1297	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				1298	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				1299
				1300	armnn::TensorInfo inputTensorInfo;
				1301	armnn::TensorInfo outputTensorInfo;
				1302	armnn::TensorInfo kernelDesc;
				1303	armnn::TensorInfo biasDesc;
				1304
				1305	unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
				1306	unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
				1307	unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
				1308	unsigned int biasShape[] = {outputChannels};
				1309
				1310	inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
				1311	outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
				1312	kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
				1313	biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
				1314
				1315	LayerTestResult<T,4> ret(outputTensorInfo);
				1316
				1317	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
				1318	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
				1319	auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
				1320
				1321	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1322	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1323
				1324	armnn::Convolution2dQueueDescriptor data;
				1325	armnn::WorkloadInfo info;
				1326	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1327	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1328
				1329	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1330	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1331
				1332	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1333	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1334	data.m_Weight = &weightsTensor;
				1335	data.m_Bias = &biasTensor;
				1336	data.m_Parameters.m_StrideX = strideX;
				1337	data.m_Parameters.m_StrideY = strideY;
				1338	data.m_Parameters.m_PadLeft = padX;
				1339	data.m_Parameters.m_PadRight = padX;
				1340	data.m_Parameters.m_PadTop = padY;
				1341	data.m_Parameters.m_PadBottom = padY;
				1342	data.m_Parameters.m_BiasEnabled = true;
				1343
				1344	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				1345	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				1346
				1347	armnn::Convolution2dQueueDescriptor refData = data;
				1348	armnn::WorkloadInfo refInfo = info;
				1349	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				1350	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				1351
				1352	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				1353	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
				1354
				1355	outputHandleRef->Allocate();
				1356	inputHandleRef->Allocate();
				1357
				1358	inputHandle->Allocate();
				1359	outputHandle->Allocate();
				1360
				1361	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1362	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				1363
				1364	ExecuteWorkload(*workload, memoryManager);
				1365
				1366	workloadRef->PostAllocationConfigure();
				1367	workloadRef->Execute();
				1368
				1369	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1370	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				1371
				1372	return ret;
				1373	}
				1374
				1375	//
				1376	// DepthwiseConvolution2d implementations
				1377	//
				1378
				1379	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1380	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1381	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
				1382	armnn::IWorkloadFactory& workloadFactory,
				1383	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1384	const boost::multi_array<T, 4>& input,
				1385	const boost::multi_array<T, 4>& kernel,
				1386	const boost::multi_array<B, 1>& bias,
				1387	const boost::multi_array<T, 4>& outputExpected,
				1388	float qScale,
				1389	int32_t qOffset,
				1390	const armnn::DataLayout layout,
				1391	uint32_t padLeft = 0,
				1392	uint32_t padTop = 0,
				1393	uint32_t padRight = 0,
				1394	uint32_t padBottom = 0,
				1395	uint32_t strideX = 1,
				1396	uint32_t strideY = 1)
				1397	{
				1398	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				1399	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
				1400	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
				1401	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
				1402	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				1403	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				1404	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				1405	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				1406	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				1407	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				1408	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				1409	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				1410
				1411	// If a bias is used, its size must equal the number of output channels.
				1412	bool biasEnabled = bias.size() > 0;
				1413	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1414
				1415	// Creates the tensors.
				1416	armnn::TensorInfo inputTensorInfo =
				1417	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1418	armnn::TensorInfo outputTensorInfo =
				1419	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1420	armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1421	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1422
				1423	// Set quantization parameters if the requested type is a quantized type.
				1424	if (armnn::IsQuantizedType<T>())
				1425	{
				1426	inputTensorInfo.SetQuantizationScale(qScale);
				1427	inputTensorInfo.SetQuantizationOffset(qOffset);
				1428	outputTensorInfo.SetQuantizationScale(qScale);
				1429	outputTensorInfo.SetQuantizationOffset(qOffset);
				1430	kernelDesc.SetQuantizationScale(qScale);
				1431	kernelDesc.SetQuantizationOffset(qOffset);
				1432	biasDesc.SetQuantizationScale(qScale*qScale);
				1433	biasDesc.SetQuantizationOffset(0);
				1434	}
				1435
				1436	// Construct the input data.
				1437	std::vector<T> inputData;
				1438	inputData.assign(input.data(), input.data() + inputChannelsinputHeightinputWidth);
				1439
				1440	// At this point if we require it permute the input data
				1441	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1442	if (layout == armnn::DataLayout::NHWC)
				1443	{
				1444	std::vector<T> tmp(inputData.size());
				1445	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1446	inputData = tmp;
				1447	}
				1448
				1449	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1450
				1451	// Construct the output data, with bias applied, as appropriate.
				1452	std::vector<T> outputData;
				1453	outputData.assign(outputExpected.data(), outputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1454	if (biasEnabled)
				1455	{
				1456	std::vector<T> biasV;
				1457	biasV.assign(bias.data(), bias.data() + outputChannels);
				1458	ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1459	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1460	outputWidth, outputHeight);
				1461	}
				1462
				1463	LayerTestResult<T, 4> ret(outputTensorInfo);
				1464
				1465	// At this point if we require it permute the expected output
				1466	if (layout == armnn::DataLayout::NHWC)
				1467	{
				1468	std::vector<T> tmp(outputData.size());
				1469	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1470	outputData = tmp;
				1471	}
				1472
				1473	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1474
				1475	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1476	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1477
				1478	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1479
				1480	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1481
				1482	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1483	if (biasEnabled)
				1484	{
				1485	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1486	}
				1487
				1488	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1489	data.m_Weight = &weightsTensor;
				1490	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
				1491	data.m_Parameters.m_StrideX = strideX;
				1492	data.m_Parameters.m_StrideY = strideY;
				1493	data.m_Parameters.m_PadLeft = padLeft;
				1494	data.m_Parameters.m_PadRight = padRight;
				1495	data.m_Parameters.m_PadTop = padTop;
				1496	data.m_Parameters.m_PadBottom = padBottom;
				1497	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1498	data.m_Parameters.m_DataLayout = layout;
				1499
				1500	armnn::WorkloadInfo info;
				1501	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1502	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1503
				1504	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1505	inputHandle->Allocate();
				1506	outputHandle->Allocate();
				1507
				1508	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				1509
				1510	ExecuteWorkload(*workload, memoryManager);
				1511
				1512	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1513
				1514	return ret;
				1515	}
				1516
				1517	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1518	LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
				1519	armnn::IWorkloadFactory& workloadFactory,
				1520	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1521	float qScale,
				1522	int32_t qOffset,
				1523	bool biasEnabled,
				1524	const armnn::DataLayout layout)
				1525	{
				1526	using B = armnn::ResolveType<ArmnnBType>;
				1527
				1528	unsigned int inputHeight = 3;
				1529	unsigned int inputWidth = 3;
				1530	unsigned int inputChannels = 2;
				1531	unsigned int inputNum = 1;
				1532
				1533	unsigned int kernelHeight = 3;
				1534	unsigned int kernelWidth = 3;
				1535	unsigned int kernelChannels = inputChannels;
				1536	unsigned int kernelDepthMultiplier = 1;
				1537
				1538	unsigned int outputHeight = 1;
				1539	unsigned int outputWidth = 1;
				1540	unsigned int outputChannels = kernelChannels;
				1541	unsigned int outputNum = inputNum;
				1542
				1543	armnn::TensorInfo inputTensorInfo =
				1544	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1545	armnn::TensorInfo outputTensorInfo =
				1546	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1547	armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
				1548	ArmnnType);
				1549	armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
				1550
				1551	// Set quantization parameters if the requested type is a quantized type.
				1552	if(armnn::IsQuantizedType<T>())
				1553	{
				1554	inputTensorInfo.SetQuantizationScale(qScale);
				1555	inputTensorInfo.SetQuantizationOffset(qOffset);
				1556	outputTensorInfo.SetQuantizationScale(qScale);
				1557	outputTensorInfo.SetQuantizationOffset(qOffset);
				1558	kernelDesc.SetQuantizationScale(qScale);
				1559	kernelDesc.SetQuantizationOffset(qOffset);
				1560	biasDesc.SetQuantizationScale(qScale*qScale);
				1561	biasDesc.SetQuantizationOffset(0);
				1562	}
				1563	std::vector<T> inputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1564	QuantizedVector<T>({
				1565	1.f, 2.f, 1.f,
				1566	2.f, 1.f, 2.f,
				1567	1.f, 2.f, 1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1568
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1569	1.f, 2.f, 1.f,
				1570	2.f, 1.f, 2.f,
				1571	1.f, 2.f, 1.f,
				1572	},
				1573	inputTensorInfo.GetQuantizationScale(),
				1574	inputTensorInfo.GetQuantizationOffset()));
				1575
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1576	// at this point if we require it permute the input data
				1577	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1578	if (layout == armnn::DataLayout::NHWC)
				1579	{
				1580	std::vector<T> tmp(inputData.size());
				1581	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1582	inputData = tmp;
				1583	}
				1584	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1585
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1586	std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
				1587	biasDesc.GetQuantizationScale(),
				1588	biasDesc.GetQuantizationOffset()));
				1589
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1590	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1591
				1592	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1593	QuantizedVector<T>({
				1594	1.f, 0.f, 1.f,
				1595	0.f, 0.f, 0.f,
				1596	-1.f, 0.f, -1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1597
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1598	1.f, 0.f, 1.f,
				1599	0.f, 0.f, 0.f,
				1600	-1.f, 0.f, -1.f,
				1601	},
				1602	kernelDesc.GetQuantizationScale(),
				1603	kernelDesc.GetQuantizationOffset()));
				1604
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1605	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1606
				1607	// Manually calculated.
				1608	std::vector<T> outputImage(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1609	QuantizedVector<T>({ 0.f, 0.f },
				1610	outputTensorInfo.GetQuantizationScale(),
				1611	outputTensorInfo.GetQuantizationOffset())
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1612	);
				1613
				1614	// Optionally apply bias to output image.
				1615	if(biasEnabled)
				1616	{
				1617	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1618	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1619	outputWidth, outputHeight);
				1620	}
				1621
				1622	LayerTestResult<T, 4> ret(outputTensorInfo);
				1623	if (layout == armnn::DataLayout::NHWC)
				1624	{
				1625	std::vector<T> tmp(outputImage.size());
				1626	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
				1627	outputImage = tmp;
				1628	}
				1629
				1630	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1631
				1632	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1633	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1634
				1635	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1636	armnn::WorkloadInfo info;
				1637	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1638	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1639
				1640	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1641	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1642
				1643	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1644	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1645
				1646	data.m_Weight = &weightsTensor;
				1647	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1648	data.m_Parameters.m_StrideX = 1;
				1649	data.m_Parameters.m_StrideY = 1;
				1650	data.m_Parameters.m_PadLeft = 0;
				1651	data.m_Parameters.m_PadRight = 0;
				1652	data.m_Parameters.m_PadTop = 0;
				1653	data.m_Parameters.m_PadBottom = 0;
				1654	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1655	data.m_Parameters.m_DataLayout = layout;
				1656
				1657	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1658	inputHandle->Allocate();
				1659	outputHandle->Allocate();
				1660
				1661	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1662
				1663	ExecuteWorkload(*workload, memoryManager);
				1664
				1665	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1666
				1667	return ret;
				1668	}
				1669
				1670	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1671	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1672	armnn::IWorkloadFactory& workloadFactory,
				1673	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1674	float qScale,
				1675	int32_t qOffset,
				1676	bool biasEnabled,
				1677	const armnn::DataLayout layout)
				1678	{
				1679	using B = armnn::ResolveType<ArmnnBType>;
				1680
				1681	unsigned int depthMultiplier = 2;
				1682
				1683	unsigned int inputHeight = 8;
				1684	unsigned int inputWidth = 16;
				1685	unsigned int inputChannels = 2;
				1686	unsigned int inputBatchSize = 1;
				1687
				1688	unsigned int kernelHeight = 5;
				1689	unsigned int kernelWidth = 3;
				1690
				1691	unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
				1692	unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
				1693	unsigned int outputChannels = inputChannels * depthMultiplier;
				1694	unsigned int outputBatchSize = inputBatchSize;
				1695
				1696	armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
				1697	inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1698	armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
				1699	outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1700	armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
				1701	ArmnnType);
				1702	armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
				1703
				1704	// Set quantization parameters if the requested type is a quantized type.
				1705	if(armnn::IsQuantizedType<T>())
				1706	{
				1707	inputTensorInfo.SetQuantizationScale(qScale);
				1708	inputTensorInfo.SetQuantizationOffset(qOffset);
				1709	outputTensorInfo.SetQuantizationScale(qScale);
				1710	outputTensorInfo.SetQuantizationOffset(qOffset);
				1711	kernelDesc.SetQuantizationScale(qScale);
				1712	kernelDesc.SetQuantizationOffset(qOffset);
				1713	biasDesc.SetQuantizationScale(qScale*qScale);
				1714	biasDesc.SetQuantizationOffset(0);
				1715	}
				1716
				1717	// NOTE: originalInputData is in NCHW format
				1718	std::vector<T> originalInputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1719	QuantizedVector<T>({
				1720	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1721	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1722	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1723	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1724	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1725	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1726	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1727	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1728	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1729	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1730	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1731	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1732	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1733	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1734	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1735	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1736	},
				1737	inputTensorInfo.GetQuantizationScale(),
				1738	inputTensorInfo.GetQuantizationOffset()));
				1739
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1740	std::vector<T> inputData = originalInputData;
				1741	// at this point if we require it permute the input data
				1742	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1743	if (layout == armnn::DataLayout::NHWC)
				1744	{
				1745	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
				1746	originalInputData.data(), inputData.data(), sizeof(T));
				1747	}
				1748	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1749
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1750	std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
				1751	biasDesc.GetQuantizationScale(),
				1752	biasDesc.GetQuantizationOffset());
				1753
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1754	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1755
				1756	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1757	QuantizedVector<T>({
				1758	1, 1, 1,
				1759	1, -1, 1,
				1760	1, 1, 1,
				1761	1, 1, 1,
				1762	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1763
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1764	2, 2, 2,
				1765	2, 2, 2,
				1766	2, 2, 2,
				1767	2, 2, 2,
				1768	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1769
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1770	0, 0, 0,
				1771	0, -1, 0,
				1772	0, 0, 0,
				1773	0, 0, 0,
				1774	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1775
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1776	0, 0, 0,
				1777	0, 0, 0,
				1778	0, 1, 0,
				1779	0, 0, 0,
				1780	0, 0, 0
				1781	},
				1782	kernelDesc.GetQuantizationScale(),
				1783	kernelDesc.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1784
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1785	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1786
				1787	// Manually calculated.
				1788	std::vector<T> originalOutputImage = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1789	QuantizedVector<T>({
				1790	3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
				1791	6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
				1792	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
				1793	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1794	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				1795	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1796
				1797	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1798	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1799	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1800	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1801	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1802	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				1803
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1804	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1805	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1806	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1807	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1808	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1809	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1810
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1811	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1812	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1813	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1814	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1815	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1816	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1817	},
				1818	outputTensorInfo.GetQuantizationScale(),
				1819	outputTensorInfo.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1820
				1821	// Optionally apply bias to output image.
				1822	if(biasEnabled)
				1823	{
				1824	ApplyBias(originalOutputImage,
				1825	outputTensorInfo.GetQuantizationScale(),
				1826	outputTensorInfo.GetQuantizationOffset(),
				1827	biasV,
				1828	biasDesc.GetQuantizationScale(),
				1829	biasDesc.GetQuantizationOffset(),
				1830	outputWidth,
				1831	outputHeight);
				1832	}
				1833
				1834	LayerTestResult<T, 4> ret(outputTensorInfo);
				1835	std::vector<T> outputImage = originalOutputImage;
				1836	if (layout == armnn::DataLayout::NHWC)
				1837	{
				1838	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
				1839	originalOutputImage.data(), outputImage.data(), sizeof(T));
				1840	}
				1841
				1842	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1843
				1844	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1845	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1846
				1847	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1848	armnn::WorkloadInfo info;
				1849	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1850	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1851
				1852	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1853	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1854
				1855	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1856	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1857
				1858	data.m_Weight = &weightsTensor;
				1859	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1860	data.m_Parameters.m_StrideX = 2;
				1861	data.m_Parameters.m_StrideY = 1;
				1862	data.m_Parameters.m_PadLeft = 0;
				1863	data.m_Parameters.m_PadRight = 0;
				1864	data.m_Parameters.m_PadTop = 1;
				1865	data.m_Parameters.m_PadBottom = 1;
				1866	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1867	data.m_Parameters.m_DataLayout = layout;
				1868
				1869	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1870	inputHandle->Allocate();
				1871	outputHandle->Allocate();
				1872
				1873	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1874
				1875	ExecuteWorkload(*workload, memoryManager);
				1876
				1877	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1878
				1879	return ret;
				1880	}
				1881
				1882	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1883	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1884	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1885	armnn::IWorkloadFactory& workloadFactory,
				1886	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1887	const boost::multi_array<T, 4>& originalInput,
				1888	const boost::multi_array<T, 4>& originalKernel,
				1889	const boost::multi_array<B, 1>& bias,
				1890	const boost::multi_array<T, 4>& originalOutputExpected,
				1891	float qScale,
				1892	int32_t qOffset,
				1893	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1894	uint32_t padLeft = 0,
				1895	uint32_t padTop = 0,
				1896	uint32_t padRight = 0,
				1897	uint32_t padBottom = 0,
				1898	uint32_t strideX = 1,
				1899	uint32_t strideY = 1,
				1900	uint32_t dilationX = 1,
				1901	uint32_t dilationY = 1)
				1902	{
				1903	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				1904	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				1905	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				1906	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				1907
				1908	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				1909	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				1910	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				1911	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				1912
				1913	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				1914	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				1915	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				1916	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				1917
				1918	bool biasEnabled = bias.size() > 0;
				1919
				1920	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				1921	BOOST_ASSERT(inputNum == 1);
				1922	BOOST_ASSERT(outputNum == 1);
				1923
				1924	// If a bias is used, its size must equal the number of output channels.
				1925	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1926
				1927
				1928	// Note these tensors will use two (identical) batches.
				1929	armnn::TensorInfo inputTensorInfo =
				1930	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1931	armnn::TensorInfo outputTensorInfo =
				1932	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1933
				1934	// Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
				1935	armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1936
				1937	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1938
				1939	// Set quantization parameters if the requested type is a quantized type.
				1940	if(armnn::IsQuantizedType<T>())
				1941	{
				1942	inputTensorInfo.SetQuantizationScale(qScale);
				1943	inputTensorInfo.SetQuantizationOffset(qOffset);
				1944	outputTensorInfo.SetQuantizationScale(qScale);
				1945	outputTensorInfo.SetQuantizationOffset(qOffset);
				1946	kernelDesc.SetQuantizationScale(qScale);
				1947	kernelDesc.SetQuantizationOffset(qOffset);
				1948	biasDesc.SetQuantizationScale(qScale*qScale);
				1949	biasDesc.SetQuantizationOffset(0);
				1950	}
				1951
				1952	LayerTestResult<T, 4> ret(outputTensorInfo);
				1953
				1954	// Construct input data
				1955	std::vector<T> input;
				1956	input.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				1957	std::vector<T> inputData;
				1958	inputData.insert(inputData.end(), input.begin(), input.end());
				1959	inputData.insert(inputData.end(), input.begin(), input.end());
				1960
				1961	// at this point if we require it permute the input data
				1962	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1963	if (layout == armnn::DataLayout::NHWC)
				1964	{
				1965	std::vector<T> tmp(inputData.size());
				1966	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1967	inputData = tmp;
				1968	}
				1969
				1970	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1971
				1972	std::vector<T> output;
				1973	output.assign(originalOutputExpected.data(),
				1974	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1975
				1976	// Apply bias to output data if it is enabled.
				1977	if(biasEnabled)
				1978	{
				1979	std::vector<T> biasV;
				1980	biasV.assign(bias.data(), bias.data() + outputChannels);
				1981	ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1982	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1983	outputWidth, outputHeight);
				1984	}
				1985
				1986	// Construct expected output data
				1987	std::vector<T> outputData;
				1988	outputData.insert(outputData.end(), output.begin(), output.end());
				1989	outputData.insert(outputData.end(), output.begin(), output.end());
				1990
				1991	// at this point if we require it permute the expected output
				1992	if (layout == armnn::DataLayout::NHWC)
				1993	{
				1994	std::vector<T> tmp(outputData.size());
				1995	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1996	outputData = tmp;
				1997	}
				1998	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1999
				2000	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2001	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2002
				2003	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2004	armnn::WorkloadInfo info;
				2005	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2006	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2007
				2008	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				2009	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2010
				2011	if(biasEnabled)
				2012	{
				2013	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2014	}
				2015
				2016	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2017	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2018
				2019	data.m_Weight = &weightsTensor;
				2020	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				2021	data.m_Parameters.m_StrideX = strideX;
				2022	data.m_Parameters.m_StrideY = strideY;
				2023	data.m_Parameters.m_PadLeft = padLeft;
				2024	data.m_Parameters.m_PadRight = padRight;
				2025	data.m_Parameters.m_PadTop = padTop;
				2026	data.m_Parameters.m_PadBottom = padBottom;
				2027	data.m_Parameters.m_BiasEnabled = biasEnabled;
				2028	data.m_Parameters.m_DataLayout = layout;
				2029	data.m_Parameters.m_DilationX = dilationX;
				2030	data.m_Parameters.m_DilationY = dilationY;
				2031
				2032	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2033	inputHandle->Allocate();
				2034	outputHandle->Allocate();
				2035
				2036	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				2037
				2038	ExecuteWorkload(*workload, memoryManager);
				2039
				2040	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2041
				2042	return ret;
				2043	}
				2044
				2045	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2046	typename T = armnn::ResolveType<ArmnnType>>
				2047	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
				2048	armnn::IWorkloadFactory& workloadFactory,
				2049	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2050	float qScale,
				2051	int32_t qOffset,
				2052	bool biasEnabled,
				2053	const armnn::DataLayout layout)
				2054	{
				2055	// Use a single-batch 2-channel 5x5 image as input.
				2056	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2057	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2058	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2059	0, 1, 2, 3, 4,
				2060	5, 6, 7, 8, 9,
				2061	10, 11, 12, 13, 14,
				2062	15, 16, 17, 18, 19,
				2063	20, 21, 22, 23, 24,
				2064
				2065	25, 26, 27, 28, 29,
				2066	30, 31, 32, 33, 34,
				2067	35, 36, 37, 38, 39,
				2068	40, 41, 42, 43, 44,
				2069	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2070	},
				2071	inputTensorInfo.GetQuantizationScale(),
				2072	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2073
				2074	// Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
				2075	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2076	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2077	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2078	32, 31, 30, 29,
				2079	28, 27, 26, 25,
				2080	24, 23, 22, 21,
				2081	20, 19, 18, 17,
				2082
				2083	16, 15, 14, 13,
				2084	12, 11, 10, 9,
				2085	8, 7, 6, 5,
				2086	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2087	},
				2088	kernelTensorInfo.GetQuantizationScale(),
				2089	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2090
				2091	// Expected output is 1 batch of a 2-channel 5x5 image.
				2092	// Calculated using the python tensorflow library with strideX=1, strideY=1.
				2093	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2094	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2095	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2096	1062, 1580, 1850, 1530, 1117,
				2097	2140, 3108, 3500, 2842, 2042,
				2098	3580, 5068, 5460, 4342, 3062,
				2099	3618, 5072, 5390, 4248, 2971,
				2100	3074, 4282, 4510, 3533, 2457,
				2101
				2102	1550, 2284, 2362, 1955, 1428,
				2103	2910, 4206, 4342, 3528, 2536,
				2104	3390, 4886, 5022, 4068, 2916,
				2105	3566, 5056, 5182, 4133, 2922,
				2106	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2107	},
				2108	outputTensorInfo.GetQuantizationScale(),
				2109	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2110
				2111	return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
				2112	workloadFactory,
				2113	memoryManager,
				2114	input,
				2115	kernel,
				2116	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2117	expectedOutput,
				2118	qScale,
				2119	qOffset,
				2120	layout,
				2121	1, // Padding left.
				2122	1, // Padding top.
				2123	2, // Padding right.
				2124	2, // Padding bottom.
				2125	1, // strideX
				2126	1); // strideY
				2127	}
				2128
				2129	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2130	typename T = armnn::ResolveType<ArmnnType>>
				2131	LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
				2132	armnn::IWorkloadFactory& workloadFactory,
				2133	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2134	float qScale,
				2135	int32_t qOffset,
				2136	bool biasEnabled)
				2137	{
				2138	auto layout = armnn::DataLayout::NHWC;
				2139
				2140	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2141	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2142	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2143	0, 1, 2, 3, 4,
				2144	5, 6, 7, 8, 9,
				2145	10, 11, 12, 13, 14,
				2146	15, 16, 17, 18, 19,
				2147	20, 21, 22, 23, 24,
				2148
				2149	25, 26, 27, 28, 29,
				2150	30, 31, 32, 33, 34,
				2151	35, 36, 37, 38, 39,
				2152	40, 41, 42, 43, 44,
				2153	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2154	},
				2155	inputTensorInfo.GetQuantizationScale(),
				2156	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2157
				2158	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2159	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2160	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2161	32, 31, 30, 29,
				2162	28, 27, 26, 25,
				2163	24, 23, 22, 21,
				2164	20, 19, 18, 17,
				2165
				2166	16, 15, 14, 13,
				2167	12, 11, 10, 9,
				2168	8, 7, 6, 5,
				2169	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2170	},
				2171	kernelTensorInfo.GetQuantizationScale(),
				2172	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2173
				2174	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2175	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2176	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2177	1062, 1580, 1850, 1530, 1117,
				2178	2140, 3108, 3500, 2842, 2042,
				2179	3580, 5068, 5460, 4342, 3062,
				2180	3618, 5072, 5390, 4248, 2971,
				2181	3074, 4282, 4510, 3533, 2457,
				2182
				2183	1550, 2284, 2362, 1955, 1428,
				2184	2910, 4206, 4342, 3528, 2536,
				2185	3390, 4886, 5022, 4068, 2916,
				2186	3566, 5056, 5182, 4133, 2922,
				2187	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2188	},
				2189	outputTensorInfo.GetQuantizationScale(),
				2190	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2191
				2192	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2193	workloadFactory,
				2194	memoryManager,
				2195	input,
				2196	kernel,
				2197	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2198	expectedOutput,
				2199	qScale,
				2200	qOffset,
				2201	layout,
				2202	1, // Padding left.
				2203	1, // Padding top.
				2204	2, // Padding right.
				2205	2, // Padding bottom.
				2206	1, // strideX
				2207	1); // strideY
				2208	}
				2209
				2210	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2211	typename T = armnn::ResolveType<ArmnnType>>
				2212	LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
				2213	armnn::IWorkloadFactory& workloadFactory,
				2214	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2215	float qScale,
				2216	int32_t qOffset,
				2217	bool biasEnabled)
				2218	{
				2219	auto layout = armnn::DataLayout::NHWC;
				2220
				2221	armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
				2222	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2223	QuantizedVector<T>({
				2224	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2225	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2226	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2227	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2228	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2229	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2230	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2231	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2232	0, 0, 0, 0, 0, 0, 0, 0, 0
				2233	},
				2234	inputTensorInfo.GetQuantizationScale(),
				2235	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2236
				2237	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2238	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2239	QuantizedVector<T>({
				2240	1, 2, 3,
				2241	4, 5, 6,
				2242	7, 8, 9
				2243	},
				2244	kernelTensorInfo.GetQuantizationScale(),
				2245	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2246
				2247	uint32_t padLeft = 0;
				2248	uint32_t padTop = 0;
				2249	uint32_t padRight = 0;
				2250	uint32_t padBottom = 0;
				2251	uint32_t strideX = 1;
				2252	uint32_t strideY = 1;
				2253	uint32_t dilationX = 3;
				2254	uint32_t dilationY = 3;
				2255
				2256	// Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
				2257	armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2258	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2259	QuantizedVector<T>({
				2260	5, 5, 5,
				2261	5, 5, 5,
				2262	5, 5, 5
				2263	},
				2264	outputTensorInfo.GetQuantizationScale(),
				2265	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2266
				2267	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2268	workloadFactory,
				2269	memoryManager,
				2270	input,
				2271	kernel,
				2272	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2273	expectedOutput,
				2274	qScale,
				2275	qOffset,
				2276	layout,
				2277	padLeft,
				2278	padTop,
				2279	padRight,
				2280	padBottom,
				2281	strideX,
				2282	strideY,
				2283	dilationX,
				2284	dilationY);
				2285	}
				2286
				2287	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				2288	LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
				2289	armnn::IWorkloadFactory& workloadFactory,
				2290	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2291	const std::vector<float>& inputNoQuantizedValues,
				2292	armnn::TensorInfo& inputTensorInfo,
				2293	const std::vector<float>& kernelNoQuantizedValues,
				2294	armnn::TensorInfo& kernelTensorInfo,
				2295	const std::vector<float>& outputExpectedNoQuantizedValues,
				2296	armnn::TensorInfo& outputTensorInfo,
				2297	uint32_t dilationX,
				2298	uint32_t dilationY,
				2299	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				2300	bool biasEnabled = false)
				2301	{
				2302	float qScale;
				2303	int32_t qOffset;
				2304	switch (ArmnnType)
				2305	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2306	case armnn::DataType::QAsymmU8:
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2307	{
				2308	qScale = 0.1f;
				2309	qOffset = 128;
				2310	break;
				2311	}
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2312	case armnn::DataType::QSymmS16:
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2313	{
				2314	qScale = 0.1f;
				2315	qOffset = 0;
				2316	break;
				2317	}
				2318	case armnn::DataType::Float32:
				2319	default:
				2320	{
				2321	qScale = 0.f;
				2322	qOffset = 0;
				2323	break;
				2324	}
				2325	}
				2326
				2327	inputTensorInfo.SetQuantizationScale(qScale);
				2328	inputTensorInfo.SetQuantizationOffset(qOffset);
				2329	kernelTensorInfo.SetQuantizationScale(qScale);
				2330	kernelTensorInfo.SetQuantizationOffset(qOffset);
				2331	outputTensorInfo.SetQuantizationScale(qScale);
				2332	outputTensorInfo.SetQuantizationOffset(qOffset);
				2333
				2334	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2335	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				2336	inputTensorInfo.GetQuantizationScale(),
				2337	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2338	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2339	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				2340	kernelTensorInfo.GetQuantizationScale(),
				2341	kernelTensorInfo.GetQuantizationOffset())));
				2342	auto expectedOutput =
				2343	MakeTensor<T, 4>(outputTensorInfo,
				2344	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				2345	outputTensorInfo.GetQuantizationScale(),
				2346	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2347
				2348	uint32_t padLeft = 0;
				2349	uint32_t padTop = 0;
				2350	uint32_t padRight = 0;
				2351	uint32_t padBottom = 0;
				2352	uint32_t strideX = 1;
				2353	uint32_t strideY = 1;
				2354
				2355	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2356	workloadFactory,
				2357	memoryManager,
				2358	input,
				2359	kernel,
				2360	GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
				2361	expectedOutput,
				2362	qScale,
				2363	qOffset,
				2364	layout,
				2365	padLeft,
				2366	padTop,
				2367	padRight,
				2368	padBottom,
				2369	strideX,
				2370	strideY,
				2371	dilationX,
				2372	dilationY);
				2373	}
				2374
				2375	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2376	LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
				2377	armnn::IWorkloadFactory& workloadFactory,
				2378	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2379	bool biasEnabled,
				2380	const armnn::DataLayout layout)
				2381	{
				2382	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				2383	std::vector<float> inputNoQuantizedValues =
				2384	{
				2385	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2386	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2387	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2388	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2389	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2390	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2391	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2392	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2393	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2394	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2395	};
				2396
				2397	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2398	std::vector<float> kernelNoQuantizedValues =
				2399	{
				2400	1, 2, 3,
				2401	4, 5, 6,
				2402	7, 8, 9
				2403	};
				2404
				2405	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2406	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2407	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				2408	std::vector<float> outputExpectedNoQuantizedValues =
				2409	{
				2410	6., 5., 5., 5.,
				2411	6., 5., 5., 5.,
				2412	6., 5., 5., 5.,
				2413	3., 2., 2., 2.
				2414	};
				2415
				2416	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2417	workloadFactory,
				2418	memoryManager,
				2419	inputNoQuantizedValues,
				2420	inputTensorInfo,
				2421	kernelNoQuantizedValues,
				2422	kernelTensorInfo,
				2423	outputExpectedNoQuantizedValues,
				2424	outputTensorInfo,
				2425	3,
				2426	3,
				2427	layout,
				2428	biasEnabled);
				2429	}
				2430
				2431	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2432	LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
				2433	armnn::IWorkloadFactory& workloadFactory,
				2434	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2435	bool biasEnabled,
				2436	const armnn::DataLayout layout)
				2437	{
				2438	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				2439	std::vector<float> inputNoQuantizedValues =
				2440	{
				2441	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2442	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2443	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2444	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2445	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2446	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2447	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2448	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2449	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2450	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2451
				2452	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2453	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2454	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2455	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2456	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2457	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2458	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2459	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2460	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2461	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2462	};
				2463
				2464	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				2465	std::vector<float> kernelNoQuantizedValues =
				2466	{
				2467	1, 2, 3,
				2468	4, 5, 6,
				2469	7, 8, 9,
				2470
				2471	1, 2, 3,
				2472	4, 5, 6,
				2473	7, 8, 9
				2474	};
				2475
				2476	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2477	// therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2478	armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
				2479	std::vector<float> outputExpectedNoQuantizedValues =
				2480	{
				2481	6., 5., 5., 5.,
				2482	6., 5., 5., 5.,
				2483	6., 5., 5., 5.,
				2484	3., 2., 2., 2.,
				2485
				2486	6., 5., 5., 5.,
				2487	6., 5., 5., 5.,
				2488	6., 5., 5., 5.,
				2489	3., 2., 2., 2.
				2490	};
				2491
				2492	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2493	workloadFactory,
				2494	memoryManager,
				2495	inputNoQuantizedValues,
				2496	inputTensorInfo,
				2497	kernelNoQuantizedValues,
				2498	kernelTensorInfo,
				2499	outputExpectedNoQuantizedValues,
				2500	outputTensorInfo,
				2501	3,
				2502	3,
				2503	layout,
				2504	biasEnabled);
				2505	}
				2506
				2507	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2508	LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
				2509	armnn::IWorkloadFactory& workloadFactory,
				2510	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2511	bool biasEnabled,
				2512	const armnn::DataLayout layout)
				2513	{
				2514	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2515	std::vector<float> inputNoQuantizedValues =
				2516	{
				2517	10.0, 10.0, 10.0,
				2518	10.0, 10.0, 10.0,
				2519	10.0, 10.0, 10.0,
				2520
				2521	21.0, 22.0, 23.0,
				2522	24.0, 25.0, 26.0,
				2523	27.0, 28.0, 29.0
				2524	};
				2525
				2526	armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
				2527
				2528	std::vector<float> kernelNoQuantizedValues =
				2529	{
				2530	0.25f, 0.25f,
				2531	0.25f, 0.25f,
				2532
				2533	0.25f, 0.25f,
				2534	0.25f, 0.25f,
				2535
				2536	0.0f , 0.0f,
				2537	0.0f , 0.1f,
				2538
				2539	0.0f , 0.0f,
				2540	0.0f , 0.1f,
				2541
				2542	0.2f , 0.0f,
				2543	0.0f , 0.0f,
				2544
				2545	0.2f , 0.0f,
				2546	0.0f , 0.0f,
				2547
				2548	0.0f , 0.3f,
				2549	0.0f , 0.0f,
				2550
				2551	0.0f , 0.3f,
				2552	0.0f , 0.0f
				2553	};
				2554
				2555	armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
				2556	std::vector<float> outputExpectedNoQuantizedValues =
				2557	{
				2558	10.f, 10.f,
				2559	10.f, 10.f,
				2560
				2561	1.f, 1.f,
				2562	1.f, 1.f,
				2563
				2564	2.f, 2.f,
				2565	2.f, 2.f,
				2566
				2567	3.f, 3.f,
				2568	3.f, 3.f,
				2569
				2570	23.f, 24.f,
				2571	26.f, 27.f,
				2572
				2573	2.5f, 2.6000001f,
				2574	2.8f, 2.9f,
				2575
				2576	4.2000003f, 4.4f,
				2577	4.8f, 5.f,
				2578
				2579	6.6000004f, 6.9f,
				2580	7.5000005f, 7.8f
				2581	};
				2582
				2583
				2584	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2585	workloadFactory,
				2586	memoryManager,
				2587	inputNoQuantizedValues,
				2588	inputTensorInfo,
				2589	kernelNoQuantizedValues,
				2590	kernelTensorInfo,
				2591	outputExpectedNoQuantizedValues,
				2592	outputTensorInfo,
				2593	1,
				2594	1,
				2595	layout,
				2596	biasEnabled);
				2597	}
				2598
				2599	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2600	LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
				2601	armnn::IWorkloadFactory& workloadFactory,
				2602	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2603	bool biasEnabled,
				2604	const armnn::DataLayout layout)
				2605	{
				2606	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2607	std::vector<float> inputNoQuantizedValues =
				2608	{
				2609	10.0, 10.0, 10.0,
				2610	10.0, 10.0, 10.0,
				2611	10.0, 10.0, 10.0,
				2612
				2613	21.0, 22.0, 23.0,
				2614	24.0, 25.0, 26.0,
				2615	27.0, 28.0, 29.0
				2616	};
				2617
				2618	armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
				2619
				2620	std::vector<float> kernelNoQuantizedValues =
				2621	{
				2622	0.25f, 0.25f,
				2623	0.25f, 0.25f,
				2624
				2625	0.2f , 0.0f,
				2626	0.0f , 0.0f,
				2627
				2628	0.0f , 0.0f,
				2629	0.0f , 0.1f,
				2630
				2631	0.0f , 0.3f,
				2632	0.0f , 0.0f
				2633
				2634	};
				2635
				2636	armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
				2637	std::vector<float> outputExpectedNoQuantizedValues =
				2638	{
				2639	10.f, 10.f,
				2640	10.f, 10.f,
				2641
				2642	1.f, 1.f,
				2643	1.f, 1.f,
				2644
				2645	4.2000003f, 4.4f,
				2646	4.8f, 5.f,
				2647
				2648	6.6000004f, 6.9f,
				2649	7.5000005f, 7.8f
				2650	};
				2651
				2652
				2653	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2654	workloadFactory,
				2655	memoryManager,
				2656	inputNoQuantizedValues,
				2657	inputTensorInfo,
				2658	kernelNoQuantizedValues,
				2659	kernelTensorInfo,
				2660	outputExpectedNoQuantizedValues,
				2661	outputTensorInfo,
				2662	1,
				2663	1,
				2664	layout,
				2665	biasEnabled);
				2666	}
				2667
				2668	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				2669	LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
				2670	armnn::IWorkloadFactory& workloadFactory,
				2671	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2672	armnn::IWorkloadFactory& refWorkloadFactory,
				2673	const armnnUtils::DataLayoutIndexed& layout)
				2674	{
				2675	unsigned int inputHeight = 8;
				2676	unsigned int inputWidth = 16;
				2677	unsigned int inputChannels = 3;
				2678	unsigned int inputNum = 5;
				2679
				2680	unsigned int kernelHeight = 3;
				2681	unsigned int kernelWidth = 3;
				2682	unsigned int channelMultiplier = 1;
				2683
				2684	unsigned int strideX = 2;
				2685	unsigned int strideY = 3;
				2686	unsigned int padX = 1;
				2687	unsigned int padY = 1;
				2688
				2689	unsigned int outputNum = inputNum;
				2690	unsigned int outputChannels = inputChannels * channelMultiplier;
				2691	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				2692	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				2693
				2694	armnn::TensorInfo inputTensorInfo;
				2695	armnn::TensorInfo outputTensorInfo;
				2696	armnn::TensorInfo kernelDesc;
				2697	armnn::TensorInfo biasDesc;
				2698
				2699
				2700	std::vector<unsigned int> inputShape;
				2701	std::vector<unsigned int> outputShape;
				2702	std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
				2703	std::vector<unsigned int> biasShape{ outputChannels };
				2704	switch (layout.GetDataLayout())
				2705	{
				2706	case armnn::DataLayout::NCHW:
				2707	inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
				2708	outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
				2709	break;
				2710	case armnn::DataLayout ::NHWC:
				2711	inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
				2712	outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
				2713	break;
				2714	default:
				2715	throw armnn::InvalidArgumentException("unknown data layout ["
				2716	+ std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
				2717	}
				2718
				2719	float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
				2720	float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
				2721	int32_t qOffset = 0;
				2722
				2723	inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
				2724	outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
				2725	kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
				2726	biasDesc = armnn::TensorInfo(
				2727	1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
				2728
				2729	LayerTestResult<T, 4> ret(outputTensorInfo);
				2730
				2731	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
				2732	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
				2733	auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
				2734	biasDesc, 1028, 0.0f, 255.0f);
				2735
				2736	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2737	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2738
				2739	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2740	armnn::WorkloadInfo info;
				2741	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2742	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2743
				2744	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2745	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2746
				2747	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2748	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2749	data.m_Weight = &weightsTensor;
				2750	data.m_Bias = &biasTensor;
				2751	data.m_Parameters.m_StrideX = strideX;
				2752	data.m_Parameters.m_StrideY = strideY;
				2753	data.m_Parameters.m_PadLeft = padX;
				2754	data.m_Parameters.m_PadRight = padX;
				2755	data.m_Parameters.m_PadTop = padY;
				2756	data.m_Parameters.m_PadBottom = padY;
				2757	data.m_Parameters.m_BiasEnabled = true;
				2758	data.m_Parameters.m_DataLayout = layout.GetDataLayout();
				2759
				2760	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				2761	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				2762
				2763	armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
				2764	armnn::WorkloadInfo refInfo = info;
				2765	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				2766	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				2767
				2768	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2769	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
				2770
				2771	outputHandleRef->Allocate();
				2772	inputHandleRef->Allocate();
				2773
				2774	inputHandle->Allocate();
				2775	outputHandle->Allocate();
				2776
				2777	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				2778	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				2779
				2780	ExecuteWorkload(*workload, memoryManager);
				2781
				2782	workloadRef->PostAllocationConfigure();
				2783	workloadRef->Execute();
				2784
				2785	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2786	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				2787
				2788	return ret;
				2789	}
				2790
				2791	//
				2792	// Explicit template specializations
				2793	//
				2794
				2795	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2796	Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2797	armnn::IWorkloadFactory&,
				2798	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2799	bool,
				2800	armnn::DataLayout);
				2801
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2802	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				2803	Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2804	armnn::IWorkloadFactory&,
				2805	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2806	bool,
				2807	armnn::DataLayout);
				2808
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2809	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				2810	Convolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2811	armnn::IWorkloadFactory&,
				2812	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2813	bool,
				2814	armnn::DataLayout);
				2815
				2816	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2817	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2818	armnn::IWorkloadFactory&,
				2819	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2820	bool,
				2821	armnn::DataLayout);
				2822
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2823	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				2824	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2825	armnn::IWorkloadFactory&,
				2826	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2827	bool,
				2828	armnn::DataLayout);
				2829
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2830	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				2831	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2832	armnn::IWorkloadFactory&,
				2833	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2834	bool,
				2835	armnn::DataLayout);
				2836
				2837	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2838	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2839	armnn::IWorkloadFactory &workloadFactory,
				2840	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2841	bool biasEnabled,
				2842	const armnn::DataLayout layout);
				2843
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2844	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				2845	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2846	armnn::IWorkloadFactory &workloadFactory,
				2847	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2848	bool biasEnabled,
				2849	const armnn::DataLayout layout);
				2850
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2851	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				2852	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2853	armnn::IWorkloadFactory &workloadFactory,
				2854	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2855	bool biasEnabled,
				2856	const armnn::DataLayout layout);
				2857
				2858	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2859	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2860	armnn::IWorkloadFactory&,
				2861	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2862	bool,
				2863	armnn::DataLayout);
				2864
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2865	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				2866	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2867	armnn::IWorkloadFactory&,
				2868	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2869	bool,
				2870	armnn::DataLayout);
				2871
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2872	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				2873	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2874	armnn::IWorkloadFactory&,
				2875	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2876	bool,
				2877	armnn::DataLayout);
				2878
				2879	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2880	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2881	armnn::IWorkloadFactory&,
				2882	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2883	bool,
				2884	armnn::DataLayout);
				2885
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2886	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				2887	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2888	armnn::IWorkloadFactory&,
				2889	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2890	bool,
				2891	armnn::DataLayout);
				2892
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2893	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				2894	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2895	armnn::IWorkloadFactory&,
				2896	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				2897	bool,
				2898	armnn::DataLayout);
				2899
				2900	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2901	DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2902	armnn::IWorkloadFactory &workloadFactory,
				2903	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2904	bool biasEnabled,
				2905	const armnn::DataLayout layout);
				2906
				2907	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				2908	DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				2909	armnn::IWorkloadFactory &workloadFactory,
				2910	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				2911	bool biasEnabled,
				2912	const armnn::DataLayout layout);
				2913
				2914	//
				2915	// Implementation functions
				2916	//
				2917
				2918	LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
				2919	armnn::IWorkloadFactory& workloadFactory,
				2920	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2921	bool biasEnabled,
				2922	const armnn::DataLayout layout)
				2923	{
				2924	return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2925	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2926	}
				2927
				2928	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
				2929	armnn::IWorkloadFactory& workloadFactory,
				2930	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2931	bool biasEnabled,
				2932	const armnn::DataLayout layout)
				2933	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2934	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2935	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2936	}
				2937
				2938	LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
				2939	armnn::IWorkloadFactory& workloadFactory,
				2940	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2941	bool biasEnabled,
				2942	const armnn::DataLayout layout)
				2943	{
				2944	return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				2945	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				2946	}
				2947
				2948	LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
				2949	armnn::IWorkloadFactory& workloadFactory,
				2950	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2951	bool biasEnabled)
				2952	{
				2953	return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
				2954	workloadFactory,
				2955	memoryManager,
				2956	0.f,
				2957	0,
				2958	biasEnabled,
				2959	armnn::DataLayout::NHWC);
				2960	}
				2961
				2962	LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
				2963	armnn::IWorkloadFactory& workloadFactory,
				2964	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2965	bool biasEnabled,
				2966	const armnn::DataLayout layout)
				2967	{
				2968	return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
				2969	workloadFactory,
				2970	memoryManager,
				2971	0.f,
				2972	0,
				2973	biasEnabled,
				2974	layout);
				2975	}
				2976
				2977	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
				2978	armnn::IWorkloadFactory& workloadFactory,
				2979	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2980	bool biasEnabled,
				2981	const armnn::DataLayout layout)
				2982	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2983	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2984	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2985	}
				2986
				2987	LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
				2988	armnn::IWorkloadFactory& workloadFactory,
				2989	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2990	bool biasEnabled,
				2991	const armnn::DataLayout layout)
				2992	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2993	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2994	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				2995	}
				2996
				2997	LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
				2998	armnn::IWorkloadFactory& workloadFactory,
				2999	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3000	bool biasEnabled,
				3001	const armnn::DataLayout layout)
				3002	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3003	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3004	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3005	}
				3006
				3007	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
				3008	armnn::IWorkloadFactory& workloadFactory,
				3009	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3010	armnn::DataLayout layout)
				3011	{
				3012	return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3013	workloadFactory, memoryManager, layout, 0.0f, 0);
				3014	}
				3015
				3016	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
				3017	armnn::IWorkloadFactory& workloadFactory,
				3018	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3019	armnn::DataLayout layout)
				3020	{
				3021	return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
				3022	<armnn::DataType::Float32, armnn::DataType::Float32>(
				3023	workloadFactory, memoryManager, layout, 0.0f, 0);
				3024	}
				3025
				3026	LayerTestResult<float, 4> Convolution1dTest(
				3027	armnn::IWorkloadFactory& workloadFactory,
				3028	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3029	bool biasEnabled)
				3030	{
				3031	return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3032	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3033	}
				3034
				3035	LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
				3036	armnn::IWorkloadFactory& workloadFactory,
				3037	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3038	bool biasEnabled)
				3039	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3040	return Convolution1dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3041	workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
				3042	}
				3043
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	3044	LayerTestResult<uint8_t, 4> Convolution2dPerAxisQuantTest(
				3045	armnn::IWorkloadFactory& workloadFactory,
				3046	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3047	const armnn::DataLayout layout)
				3048	{
				3049	using namespace armnn;
				3050
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3051	const DataType inputType = DataType::QAsymmU8;
Derek Lamberti	d466a54	2020-01-22 15:37:29 +0000	[diff] [blame]	3052	const DataType kernelType = DataType::QSymmS8;
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	3053	const DataType biasType = DataType::Signed32;
				3054
				3055	TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
				3056	TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
				3057
				3058	const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
				3059	constexpr unsigned int quantDimension = 0;
				3060
				3061	TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
				3062
				3063	const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
				3064	TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
				3065
				3066	std::vector<uint8_t> inputData =
				3067	{
				3068	138, 108, 138, 108, 138, 108
				3069	};
				3070
				3071	std::vector<int8_t> kernelData =
				3072	{
				3073	1, 2, 1, 2, 1, 2
				3074	};
				3075
				3076	std::vector<int32_t> biasData =
				3077	{
				3078	4, 4, 4
				3079	};
				3080
				3081	std::vector<uint8_t> expectedOutputData =
				3082	{
				3083	121, 118, 115, 121, 118, 115, 121, 118, 115
				3084	};
				3085
				3086	if (layout == DataLayout::NCHW)
				3087	{
				3088	PermuteTensorNhwcToNchw(inputInfo, inputData);
				3089	PermuteTensorNhwcToNchw(kernelInfo, kernelData);
				3090	PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
				3091	}
				3092
				3093	Convolution2dDescriptor descriptor;
				3094	descriptor.m_StrideX = 1;
				3095	descriptor.m_StrideY = 1;
				3096	descriptor.m_PadLeft = 0;
				3097	descriptor.m_PadRight = 0;
				3098	descriptor.m_PadTop = 0;
				3099	descriptor.m_PadBottom = 0;
				3100	descriptor.m_BiasEnabled = true;
				3101	descriptor.m_DataLayout = layout;
				3102
				3103	std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				3104	std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				3105
				3106	WorkloadInfo workloadInfo;
				3107	ScopedCpuTensorHandle weightTensor(kernelInfo);
				3108	ScopedCpuTensorHandle biasTensor(biasInfo);
				3109
				3110	AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
				3111	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				3112
				3113	Convolution2dQueueDescriptor queueDescriptor;
				3114	queueDescriptor.m_Parameters = descriptor;
				3115	queueDescriptor.m_Weight = &weightTensor;
				3116	queueDescriptor.m_Bias = &biasTensor;
				3117
				3118	AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
				3119	AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
				3120
				3121	std::unique_ptr<IWorkload> workload = workloadFactory.CreateConvolution2d(queueDescriptor, workloadInfo);
				3122	inputHandle->Allocate();
				3123	outputHandle->Allocate();
				3124
				3125	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				3126
				3127	ExecuteWorkload(*workload, memoryManager);
				3128
				3129	LayerTestResult<uint8_t, 4> ret(outputInfo);
				3130	CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
				3131	ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
				3132
				3133	return ret;
				3134	}
				3135
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3136	LayerTestResult<float,4> CompareConvolution2dTest(
				3137	armnn::IWorkloadFactory& workloadFactory,
				3138	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3139	armnn::IWorkloadFactory& refWorkloadFactory)
				3140	{
				3141	return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
				3142	workloadFactory, memoryManager, refWorkloadFactory);
				3143	}
				3144
				3145	LayerTestResult<float, 4> DepthwiseConvolution2dTest(
				3146	armnn::IWorkloadFactory& workloadFactory,
				3147	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3148	bool biasEnabled,
				3149	const armnn::DataLayout layout)
				3150	{
				3151	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3152	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3153	}
				3154
				3155	LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
				3156	armnn::IWorkloadFactory& workloadFactory,
				3157	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3158	bool biasEnabled)
				3159	{
				3160	return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3161	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3162	}
				3163
				3164	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
				3165	armnn::IWorkloadFactory& workloadFactory,
				3166	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3167	bool biasEnabled,
				3168	const armnn::DataLayout layout)
				3169	{
				3170	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3171	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3172	}
				3173
				3174	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
				3175	armnn::IWorkloadFactory& workloadFactory,
				3176	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3177	{
				3178	armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
				3179	auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
				3180
				3181	std::vector<float> kernelData;
				3182	std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
				3183	for (unsigned int i = 0; i < 64; ++i)
				3184	{
				3185	kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
				3186	}
				3187	armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
				3188	auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
				3189
				3190	std::vector<float> expectedOutputData(64, 0.f);
				3191	armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
				3192	auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
				3193
				3194	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3195	workloadFactory,
				3196	memoryManager,
				3197	input,
				3198	kernel,
				3199	boost::multi_array<float, 1>(),
				3200	expectedOutput,
				3201	0.f,
				3202	0,
				3203	armnn::DataLayout::NCHW);
				3204	}
				3205
				3206	LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
				3207	armnn::IWorkloadFactory& workloadFactory,
				3208	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3209	bool biasEnabled,
				3210	const armnn::DataLayout layout)
				3211	{
				3212	return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3213	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3214	}
				3215
				3216	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
				3217	armnn::IWorkloadFactory& workloadFactory,
				3218	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3219	bool biasEnabled,
				3220	const armnn::DataLayout layout)
				3221	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3222	return DepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3223	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3224	}
				3225
				3226	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
				3227	armnn::IWorkloadFactory& workloadFactory,
				3228	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3229	bool biasEnabled,
				3230	const armnn::DataLayout layout)
				3231	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3232	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3233	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3234	}
				3235
				3236	LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
				3237	armnn::IWorkloadFactory& workloadFactory,
				3238	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3239	{
				3240	return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3241	workloadFactory,
				3242	memoryManager,
				3243	0.f,
				3244	0,
				3245	false);
				3246	}
				3247
				3248	LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
				3249	armnn::IWorkloadFactory& workloadFactory,
				3250	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3251	bool biasEnabled,
				3252	const armnn::DataLayout layout)
				3253	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3254	return DepthwiseConvolution2dTestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3255	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3256	}
				3257
				3258	LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
				3259	armnn::IWorkloadFactory& workloadFactory,
				3260	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3261	bool biasEnabled,
				3262	const armnn::DataLayout layout)
				3263	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3264	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3265	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3266	}
				3267
Teresa Charlin	d8df026	2019-11-11 12:28:15 +0000	[diff] [blame]	3268	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
				3269	armnn::IWorkloadFactory& workloadFactory,
				3270	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3271	const armnn::DataLayout layout)
				3272	{
				3273	using namespace armnn;
				3274
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3275	const DataType inputType = DataType::QAsymmU8;
Derek Lamberti	d466a54	2020-01-22 15:37:29 +0000	[diff] [blame]	3276	const DataType kernelType = DataType::QSymmS8;
Teresa Charlin	d8df026	2019-11-11 12:28:15 +0000	[diff] [blame]	3277	const DataType biasType = DataType::Signed32;
				3278
				3279	TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
				3280	TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
				3281
				3282	const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
				3283	const unsigned int quantDimension = 0;
				3284	TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
				3285
				3286	const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
				3287	constexpr unsigned int biasQuantDimension = 0;
				3288	TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
				3289
				3290	std::vector<uint8_t> inputData =
				3291	{
				3292	129, 130,
				3293	129, 130,
				3294	129, 130,
				3295	129, 130,
				3296	129, 130,
				3297	129, 130,
				3298	129, 130,
				3299	129, 130,
				3300	129, 130
				3301	};
				3302
				3303	std::vector<int8_t> kernelData =
				3304	{
				3305	1, 1, 1, 1,
				3306	1, 1, 1, 1,
				3307	1, 1, 1, 1,
				3308	1, 1, 1, 1
				3309	};
				3310
				3311	std::vector<int32_t> biasData =
				3312	{
				3313	4, 4, 4, 4
				3314	};
				3315
				3316	std::vector<uint8_t> expectedOutputData =
				3317	{
				3318	132, 130, 134, 131,
				3319	132, 130, 134, 131,
				3320	132, 130, 134, 131,
				3321	132, 130, 134, 131
				3322	};
				3323
				3324	if (layout == DataLayout::NCHW)
				3325	{
				3326	PermuteTensorNhwcToNchw(inputInfo, inputData);
				3327	PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
				3328	}
				3329
				3330	DepthwiseConvolution2dDescriptor descriptor;
				3331	descriptor.m_StrideX = 1;
				3332	descriptor.m_StrideY = 1;
				3333	descriptor.m_PadLeft = 0;
				3334	descriptor.m_PadRight = 0;
				3335	descriptor.m_PadTop = 0;
				3336	descriptor.m_PadBottom = 0;
				3337	descriptor.m_DilationX = 1;
				3338	descriptor.m_DilationY = 1;
				3339	descriptor.m_BiasEnabled = true;
				3340	descriptor.m_DataLayout = layout;
				3341
				3342	std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				3343	std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				3344
				3345	WorkloadInfo workloadInfo;
				3346	ScopedCpuTensorHandle weightTensor(kernelInfo);
				3347	ScopedCpuTensorHandle biasTensor(biasInfo);
				3348
				3349	AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
				3350	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				3351
				3352	DepthwiseConvolution2dQueueDescriptor queueDescriptor;
				3353	queueDescriptor.m_Parameters = descriptor;
				3354	queueDescriptor.m_Weight = &weightTensor;
				3355	queueDescriptor.m_Bias = &biasTensor;
				3356
				3357	AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
				3358	AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
				3359
				3360	std::unique_ptr<IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo);
				3361	inputHandle->Allocate();
				3362	outputHandle->Allocate();
				3363
				3364	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				3365
				3366	ExecuteWorkload(*workload, memoryManager);
				3367
				3368	LayerTestResult<uint8_t, 4> ret(outputInfo);
				3369
				3370	CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
				3371	ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
				3372
				3373	return ret;
				3374	}
				3375
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3376	LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
				3377	armnn::IWorkloadFactory& workloadFactory,
				3378	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3379	armnn::IWorkloadFactory& refWorkloadFactory,
				3380	const armnn::DataLayout layout)
				3381	{
				3382	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
				3383	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3384	}
				3385
				3386	LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
				3387	armnn::IWorkloadFactory& workloadFactory,
				3388	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3389	armnn::IWorkloadFactory& refWorkloadFactory,
				3390	const armnn::DataLayout layout)
				3391	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3392	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3393	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3394	}