Blame - src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp - ml/armnn

blob: 50ad667ddedbbd96a4a2bcc0641024f54e304e4c [file] [log] [blame]

Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "Conv2dTestImpl.hpp"
				7
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	8	#include <QuantizeHelper.hpp>
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	9	#include <armnnUtils/TensorUtils.hpp>
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	10
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame]	11	#include <armnn/utility/IgnoreUnused.hpp>
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	12	#include <armnnUtils/DataLayoutIndexed.hpp>
				13	#include <armnnUtils/Permute.hpp>
				14
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	15	#include <backendsCommon/CpuTensorHandle.hpp>
				16
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	17	#include <backendsCommon/test/DataLayoutUtils.hpp>
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	18	#include <backendsCommon/test/TensorCopyUtils.hpp>
				19	#include <backendsCommon/test/WorkloadTestUtils.hpp>
				20
				21	#include <test/TensorHelpers.hpp>
				22
				23	#include <boost/numeric/conversion/cast.hpp>
				24
				25	#include <string>
				26
				27	//
				28	// Static data
				29	//
				30
				31	// 2-channel bias used by a number of Conv2d tests.
				32	static std::vector<float> Bias2({0, 2});
				33
				34	static std::vector<float> Bias4({1, 2, 3, 4});
				35
				36	static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
				37
				38	// 3-channel 16x8 image used as common input data for a number of Conv2d tests.
				39	static std::vector<float> ConvInput3x8x16({
				40	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				41	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				42	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				43	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				44	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				45	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				46	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				47	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				48	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				49	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				50	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				51	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				52	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				53	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				54	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				55	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				56	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				57	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				58	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				59	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				60	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				61	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				62	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
				63	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
				64	});
				65
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	66	using namespace armnnUtils;
				67
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	68	//
				69	// Helper templates
				70	//
				71
				72	// Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
				73	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				74	boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
				75	{
				76	if(biasEnabled)
				77	{
				78	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	79	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias2, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	80	return bias;
				81	}
				82	else
				83	{
				84	return boost::multi_array<T, 1>();
				85	}
				86	}
				87
				88	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				89	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				90	boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
				91	{
				92	if(biasEnabled)
				93	{
				94	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	95	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias4, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	96	return bias;
				97	}
				98	else
				99	{
				100	return boost::multi_array<T, 1>();
				101	}
				102	}
				103
				104	// Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
				105	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				106	boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
				107	{
				108	if(biasEnabled)
				109	{
				110	armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	111	boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias8, qScale, 0.0f));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	112	return bias;
				113	}
				114	else
				115	{
				116	return boost::multi_array<T, 1>();
				117	}
				118	}
				119
				120	// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
				121	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				122	boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
				123	{
				124	const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
				125	const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
				126	const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
				127
				128	switch (outputChannels)
				129	{
				130	case 2:
				131	default:
				132	{
				133	return GetBias2<ArmnnType>(biasEnabled, qScale);
				134	}
				135	case 4:
				136	{
				137	return GetBias4<ArmnnType>(biasEnabled, qScale);
				138	}
				139	case 8:
				140	{
				141	return GetBias8<ArmnnType>(biasEnabled, qScale);
				142	}
				143	}
				144	}
				145
				146	//
				147	// Implementation templates
				148	//
				149
				150	// Mapping from input type to bias type for fully connected layers.
				151	// float => float, uint8_t => int32_t
				152	template<typename T>
				153	struct FullyConnectedBiasTypeForInputType;
				154
				155	template<>
				156	struct FullyConnectedBiasTypeForInputType<float>
				157	{
				158	using Type = float;
				159	};
				160
				161	template<>
				162	struct FullyConnectedBiasTypeForInputType<uint8_t>
				163	{
				164	using Type = int32_t;
				165	};
				166
				167	// Modifies a std::vector in-place using a specified bias.
				168	template<typename T, typename B>
				169	void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
				170	const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
				171	{
				172	BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) \|\| (!armnn::IsQuantizedType<T>()),
				173	"Invalid type and parameter combination.");
				174	BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) \|\| (!armnn::IsQuantizedType<B>()),
				175	"Invalid type and parameter combination.");
				176
				177	// Note we need to dequantize and re-quantize the image value and the bias.
				178	for (uint32_t i = 0; i < bias.size(); ++i)
				179	{
				180	float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
				181	for (uint32_t y = 0; y < h; ++y)
				182	{
				183	for (uint32_t x = 0; x < w; ++x)
				184	{
				185	uint32_t offset = (i * h + y) * w + x;
				186	BOOST_ASSERT(offset < v.size());
				187	T& outRef = v[offset];
				188	float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
				189	outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
				190	}
				191	}
				192	}
				193	}
				194
				195	//
				196	// Convolution2d implementations
				197	//
				198
				199	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				200	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				201	LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
				202	armnn::IWorkloadFactory& workloadFactory,
				203	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				204	const boost::multi_array<T, 4>& originalInput,
				205	const boost::multi_array<T, 4>& originalKernel,
				206	const boost::multi_array<B, 1>& bias,
				207	const boost::multi_array<T, 4>& originalOutputExpected,
				208	float qScale,
				209	int32_t qOffset,
				210	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				211	uint32_t padLeft = 0,
				212	uint32_t padTop = 0,
				213	uint32_t padRight = 0,
				214	uint32_t padBottom = 0,
				215	uint32_t strideX = 1,
				216	uint32_t strideY = 1,
				217	uint32_t dilationX = 1,
				218	uint32_t dilationY = 1)
				219	{
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame]	220	armnn::IgnoreUnused(memoryManager);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	221	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				222	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				223	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				224	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				225
				226	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				227	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				228	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				229	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				230
				231	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				232	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				233	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				234	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				235
				236	bool biasEnabled = bias.size() > 0;
				237
				238	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				239	BOOST_ASSERT(inputNum == 1);
				240	BOOST_ASSERT(outputNum == 1);
				241
				242	// If a bias is used, its size must equal the number of output channels.
				243	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				244
				245
				246	// Note these tensors will use two (identical) batches.
				247	armnn::TensorInfo inputTensorInfo =
				248	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				249	armnn::TensorInfo outputTensorInfo =
				250	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				251	armnn::TensorInfo kernelDesc =
				252	armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
				253	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				254
				255	// Set quantization parameters if the requested type is a quantized type.
				256	if(armnn::IsQuantizedType<T>())
				257	{
				258	inputTensorInfo.SetQuantizationScale(qScale);
				259	inputTensorInfo.SetQuantizationOffset(qOffset);
				260	outputTensorInfo.SetQuantizationScale(qScale);
				261	outputTensorInfo.SetQuantizationOffset(qOffset);
				262	kernelDesc.SetQuantizationScale(qScale);
				263	kernelDesc.SetQuantizationOffset(qOffset);
				264	biasDesc.SetQuantizationScale(qScale*qScale);
				265	biasDesc.SetQuantizationOffset(0);
				266	}
				267
				268	LayerTestResult<T, 4> ret(outputTensorInfo);
				269
				270	// Construct input data - two batches of the same input image.
				271	std::vector<T> inputImage;
				272	inputImage.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				273	std::vector<T> inputData;
				274	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				275	inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
				276
				277	// at this point if we require it permute the input data
				278	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				279	if (layout == armnn::DataLayout::NHWC)
				280	{
				281	std::vector<T> tmp(inputData.size());
				282	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				283	inputData = tmp;
				284	}
				285
				286	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				287
				288	std::vector<T> outputImage;
				289	outputImage.assign(originalOutputExpected.data(),
				290	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				291
				292	// Apply bias to output image if it is enabled.
				293	if(biasEnabled)
				294	{
				295	std::vector<T> biasV;
				296	biasV.assign(bias.data(), bias.data() + outputChannels);
				297	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				298	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				299	outputWidth, outputHeight);
				300	}
				301
				302	// Construct expected output data - two identical images.
				303	std::vector<T> outputData;
				304	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				305	outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
				306
				307	// at this point if we require it permute the expected output
				308	if (layout == armnn::DataLayout::NHWC)
				309	{
				310	std::vector<T> tmp(outputData.size());
				311	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				312	outputData = tmp;
				313	}
				314	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				315
				316	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				317	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				318
				319	armnn::Convolution2dQueueDescriptor data;
				320	armnn::WorkloadInfo info;
				321	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				322	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				323	// Permute the kernel if necessary
				324	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				325	if (layout == armnn::DataLayout::NHWC)
				326	{
				327	armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
				328	}
				329	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				330
				331	if(biasEnabled)
				332	{
				333	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				334	}
				335
				336	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				337	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				338
				339	data.m_Weight = &weightsTensor;
				340	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				341	data.m_Parameters.m_StrideX = strideX;
				342	data.m_Parameters.m_StrideY = strideY;
				343	data.m_Parameters.m_PadLeft = padLeft;
				344	data.m_Parameters.m_PadRight = padRight;
				345	data.m_Parameters.m_PadTop = padTop;
				346	data.m_Parameters.m_PadBottom = padBottom;
				347	data.m_Parameters.m_BiasEnabled = biasEnabled;
				348	data.m_Parameters.m_DataLayout = layout;
				349	data.m_Parameters.m_DilationX = dilationX;
				350	data.m_Parameters.m_DilationY = dilationY;
				351
				352	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				353	inputHandle->Allocate();
				354	outputHandle->Allocate();
				355
				356	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				357
				358	ExecuteWorkload(*workload, memoryManager);
				359
				360	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				361
				362	return ret;
				363	}
				364
				365	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
Narumol Prangnawarat	e8cddeb	2020-04-01 16:51:23 +0100	[diff] [blame^]	366	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>,
				367	armnn::DataType OutType = ArmnnType, typename O = armnn::ResolveType<OutType>>
				368	LayerTestResult<O, 4> SimpleConvolution2dNhwcTestImpl(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	369	armnn::IWorkloadFactory& workloadFactory,
				370	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				371	const boost::multi_array<T, 4>& input,
				372	const boost::multi_array<T, 4>& kernel,
				373	const boost::multi_array<B, 1>& bias,
Narumol Prangnawarat	e8cddeb	2020-04-01 16:51:23 +0100	[diff] [blame^]	374	const boost::multi_array<O, 4>& outputExpected,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	375	const armnn::DataLayout dataLayout,
				376	float qScale,
				377	int32_t qOffset,
				378	uint32_t padLeft = 1,
				379	uint32_t padTop = 1,
				380	uint32_t padRight = 1,
				381	uint32_t padBottom = 1,
				382	uint32_t strideX = 1,
				383	uint32_t strideY = 1)
				384	{
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame]	385	armnn::IgnoreUnused(qScale, qOffset);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	386	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				387	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
				388	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
				389	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
				390
				391	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				392	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				393	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				394	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				395
				396	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				397	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				398	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				399	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				400
				401	bool biasEnabled = bias.size() > 0;
				402
				403	// Creates the tensors.
				404	armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
				405	armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
Narumol Prangnawarat	e8cddeb	2020-04-01 16:51:23 +0100	[diff] [blame^]	406	OutType);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	407	armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
				408	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				409
				410	// Construct the input data.
				411	std::vector<T> inputData;
				412	inputData.assign(input.data(), input.data() + inputHeightinputWidthinputChannels);
				413	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				414
				415	// Construct the output data, with bias applied, as appropriate.
Narumol Prangnawarat	e8cddeb	2020-04-01 16:51:23 +0100	[diff] [blame^]	416	std::vector<O> outputData;
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	417	outputData.assign(outputExpected.data(), outputExpected.data() + outputHeightoutputWidthoutputChannels);
				418
Narumol Prangnawarat	e8cddeb	2020-04-01 16:51:23 +0100	[diff] [blame^]	419	LayerTestResult<O, 4> ret(outputTensorInfo);
				420	ret.outputExpected = MakeTensor<O, 4>(outputTensorInfo, outputData);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	421
				422	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				423	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				424
				425	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				426	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				427
				428	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				429
				430	armnn::Convolution2dQueueDescriptor data;
				431
				432	data.m_Weight = &weightsTensor;
				433	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				434	data.m_Parameters.m_StrideX = strideX;
				435	data.m_Parameters.m_StrideY = strideY;
				436	data.m_Parameters.m_PadLeft = padLeft;
				437	data.m_Parameters.m_PadRight = padRight;
				438	data.m_Parameters.m_PadTop = padTop;
				439	data.m_Parameters.m_PadBottom = padBottom;
				440	data.m_Parameters.m_BiasEnabled = biasEnabled;
				441	data.m_Parameters.m_DataLayout = dataLayout;
				442
				443	armnn::WorkloadInfo info;
				444	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				445	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				446
				447	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				448	inputHandle->Allocate();
				449	outputHandle->Allocate();
				450
				451	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				452
				453	ExecuteWorkload(*workload, memoryManager);
				454
				455	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				456
				457	return ret;
				458	}
				459
				460	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				461	LayerTestResult<T,4> Convolution1dTestImpl(
				462	armnn::IWorkloadFactory& workloadFactory,
				463	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				464	float qScale,
				465	int32_t qOffset,
				466	bool biasEnabled)
				467	{
				468	using B = armnn::ResolveType<ArmnnBType>;
				469	// Until we have a specialist 1D convolution layer, we can fake one using
				470	// 2D convolution with the final dimension set to 1.
				471	// I don't anticipate this being particularly slow, given that convolution is implemented
				472	// as a matrix multiplication, at which point dimension doesn't matter.
				473
				474	unsigned int batchSize = 1;
				475	unsigned int inputChannels = 2;
				476	unsigned int outputChannels = 3;
				477	unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
				478	unsigned int kernelSize = 3;
				479	unsigned int padSize = 2;
				480	unsigned int stride = 1;
				481	unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
				482
				483	armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
				484	armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
				485	armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
				486	armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
				487
				488	// Set quantization parameters if the requested type is a quantized type.
				489	if(armnn::IsQuantizedType<T>())
				490	{
				491	inputInfo.SetQuantizationScale(qScale);
				492	inputInfo.SetQuantizationOffset(qOffset);
				493	outputInfo.SetQuantizationScale(qScale);
				494	outputInfo.SetQuantizationOffset(qOffset);
				495	kernelInfo.SetQuantizationScale(qScale);
				496	kernelInfo.SetQuantizationOffset(qOffset);
				497	biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
				498	biasInfo.SetQuantizationOffset(0);
				499	}
				500
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	501	std::vector<T> inputData = QuantizedVector<T>(
				502	{
				503	5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
				504	-3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
				505	},
				506	inputInfo.GetQuantizationScale(),
				507	inputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	508
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	509	std::vector<T> kernelData = QuantizedVector<T>(
				510	{
				511	1.0f, 0.0f, 0.0f,
				512	0.0f, 2.0f, -1.5f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	513
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	514	0.0f, 0.0f, 0.0f,
				515	0.2f, 0.2f, 0.2f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	516
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	517	0.5f, 0.0f, 0.5f,
				518	0.0f, -1.0f, 0.0f
				519	},
				520	kernelInfo.GetQuantizationScale(),
				521	kernelInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	522
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	523	std::vector<B> biasData =
				524	QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	525
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	526	std::vector<T> outputData = QuantizedVector<T>(
				527	{
				528	4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	529	-0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	530	2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
				531	},
				532	outputInfo.GetQuantizationScale(),
				533	outputInfo.GetQuantizationOffset());
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	534
				535	// Optionally apply bias to output image.
				536	if(biasEnabled)
				537	{
				538	ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
				539	biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
				540	1, outputSize);
				541	}
				542
				543	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				544	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				545
				546	armnn::Convolution2dQueueDescriptor data;
				547	armnn::WorkloadInfo info;
				548	armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
				549	armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
				550
				551	AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
				552	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				553
				554	AddInputToWorkload(data, info, inputInfo, inputHandle.get());
				555	AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
				556
				557	data.m_Weight = &weightsTensor;
				558	data.m_Bias = &biasTensor;
				559	data.m_Parameters.m_StrideX = 1;
				560	data.m_Parameters.m_StrideY = stride;
				561	data.m_Parameters.m_PadLeft = 0;
				562	data.m_Parameters.m_PadRight = 0;
				563	data.m_Parameters.m_PadTop = padSize;
				564	data.m_Parameters.m_PadBottom = padSize;
				565	data.m_Parameters.m_BiasEnabled = biasEnabled;
				566
				567	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				568	inputHandle->Allocate();
				569	outputHandle->Allocate();
				570
				571	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				572
				573	ExecuteWorkload(*workload, memoryManager);
				574
				575	// Output
				576	LayerTestResult<T,4> ret(outputInfo);
				577	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				578	ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
				579	return ret;
				580	}
				581
				582	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				583	LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
				584	armnn::IWorkloadFactory& workloadFactory,
				585	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				586	float qScale,
				587	int32_t qOffset,
				588	bool biasEnabled,
				589	armnn::DataLayout dataLayout)
				590	{
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame]	591	armnn::IgnoreUnused(biasEnabled);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	592	// Use common single-batch 5x5 image.
				593
				594	armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
				595	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				596	{
				597	1, 5, 2, 3,
				598	8, 7, 3, 6,
				599	3, 3, 9, 1
				600	});
				601
				602
				603	// Use a 2-element batch of 3-channel 3x3 kernels.
				604	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				605	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
				606	4, 5, 6,
				607	0, 0, 0,
				608	3, 2, 1
				609	});
				610
				611	// Expected output is 1 batch of a 5x5 image.
				612	armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
				613
				614	const std::vector<float> outputData =
				615	{
				616	23, 41, 33, 21,
				617	44, 65, 76, 52,
				618	82, 85, 79, 42
				619	};
				620
				621	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				622
				623	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				624	workloadFactory,
				625	memoryManager,
				626	input,
				627	kernel,
				628	boost::multi_array<T, 1>(),
				629	expectedOutput,
				630	dataLayout,
				631	qScale,
				632	qOffset);
				633	}
				634
				635	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				636	LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
				637	armnn::IWorkloadFactory& workloadFactory,
				638	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				639	float qScale,
				640	int32_t qOffset,
				641	bool biasEnabled,
				642	const armnn::DataLayout& dataLayout)
				643	{
Jan Eilers	8eb2560	2020-03-09 12:13:48 +0000	[diff] [blame]	644	armnn::IgnoreUnused(biasEnabled);
Derek Lamberti	c374ff0	2019-12-10 21:57:35 +0000	[diff] [blame]	645
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	646	// Input is a single-batch, 1 channel, 5x5 image.
				647	armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
				648	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
				649	{
				650	1, 5, 2, 3, 5,
				651	8, 7, 3, 6, 3,
				652	3, 3, 9, 1, 9,
				653	4, 1, 8, 1, 3,
				654	6, 8, 1, 9, 2
				655	});
				656
				657	// Use a 3x3 kernel.
				658	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
				659	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
				660	{
				661	4, 5, 6,
				662	0, 0, 0,
				663	3, 2, 1
				664	});
				665
				666	// Expected output is a single-batch, 1 channel, 3x3 image.
				667	armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
				668
				669	const std::vector<T> outputData =
				670	{
				671	23, 33, 24,
				672	91, 99, 48,
				673	26, 50, 19
				674	};
				675
				676	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
				677
				678	uint32_t padLeft = 1;
				679	uint32_t padTop = 1;
				680	uint32_t padRight = 1;
				681	uint32_t padBottom = 1;
				682	uint32_t strideX = 2;
				683	uint32_t strideY = 2;
				684
				685	return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
				686	workloadFactory,
				687	memoryManager,
				688	input,
				689	kernel,
				690	boost::multi_array<T, 1>(),
				691	expectedOutput,
				692	dataLayout,
				693	qScale,
				694	qOffset,
				695	padLeft,
				696	padTop,
				697	padRight,
				698	padBottom,
				699	strideX,
				700	strideY);
				701	}
				702
				703	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				704	LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
				705	armnn::IWorkloadFactory& workloadFactory,
				706	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				707	float qScale,
				708	int32_t qOffset,
				709	bool biasEnabled,
				710	const armnn::DataLayout layout)
				711	{
				712	// Use common single-batch 3-channel 16x8 image.
				713	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	714	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	715
				716	// Use a 2-element batch with 3-channel 3x5 kernels.
				717	armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
				718	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	719	QuantizedVector<T>({
				720	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	721	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	722	1, 1, 1,
				723	1, 1, 1,
				724	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	725
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	726	0, 0, 0,
				727	0, 0, 0,
				728	0, 0, 0,
				729	0, 0, 0,
				730	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	731
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	732	2, 2, 2,
				733	2, 2, 2,
				734	2, 2, 2,
				735	2, 2, 2,
				736	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	737
				738
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	739	0, 0, 0,
				740	0, 0, 0,
				741	0, 0, 0,
				742	0, 0, 0,
				743	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	744
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	745	1, 1, 1,
				746	1, 1, 1,
				747	1, 1, 1,
				748	1, 1, 1,
				749	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	750
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	751	0, 0, 0,
				752	0, 0, 0,
				753	0, 0, 0,
				754	0, 0, 0,
				755	0, 0, 0
				756	},
				757	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	758
				759	// Expected output is 2 batch elements of a 1-channel 14x4 image.
				760	armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
				761	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	762	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	763	-24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
				764	-25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
				765	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				766	-23.5f, -23.5f, -23.5f,
				767	-23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
				768	-23.5f, -23.5f, -23.5f,
				769
				770	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				771	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				772	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				773	5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	774	},
				775	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	776
				777	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				778	workloadFactory,
				779	memoryManager,
				780	input,
				781	kernel,
				782	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				783	expectedOutput,
				784	qScale,
				785	qOffset,
				786	layout);
				787	}
				788
				789	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				790	typename T = armnn::ResolveType<ArmnnType>>
				791	LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
				792	armnn::IWorkloadFactory& workloadFactory,
				793	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				794	float qScale,
				795	int32_t qOffset,
				796	bool biasEnabled,
				797	const armnn::DataLayout layout)
				798	{
				799	// Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
				800
				801	// Use common single-batch 3-channel 16x8 image.
				802	armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	803	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	804
				805	// Use a 2-element batch of 3-channel 3x3 kernels.
				806	armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
				807	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	808	QuantizedVector<T>({
				809	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	810	1, -1, 1,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	811	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	812
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	813	0, 0, 0,
				814	0, 0, 0,
				815	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	816
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	817	2, 2, 2,
				818	2, 2, 2,
				819	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	820
				821
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	822	0, 0, 0,
				823	0, 0, 0,
				824	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	825
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	826	1, 1, 1,
				827	1, 1, 1,
				828	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	829
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	830	0, 0, 0,
				831	0, 0, 0,
				832	0, 0, 0
				833	},
				834	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	835
				836	// Expected output is 1 batch of a 2-channel 14x6 image.
				837	armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
				838	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	839	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	840	-15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
				841	-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
				842	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				843	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				844	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				845	-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
				846
				847	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				848	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				849	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				850	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				851	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				852	3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	853	},
				854	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	855
				856	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				857	workloadFactory,
				858	memoryManager,
				859	input,
				860	kernel,
				861	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				862	expectedOutput,
				863	qScale,
				864	qOffset,
				865	layout);
				866	}
				867
				868	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				869	typename T = armnn::ResolveType<ArmnnType>>
				870	LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
				871	armnn::IWorkloadFactory& workloadFactory,
				872	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				873	const armnn::DataLayout layout,
				874	float qScale,
				875	int32_t qOffset)
				876	{
				877	// Use a single-batch 1-channel 3x3 image as input.
				878	armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
				879	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	880	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	881	11,21,31,
				882	12,22,32,
				883	13,23,33
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	884	},
				885	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	886
				887	// Use 1 batch of a 1-channel 2x2 kernel.
				888	armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
				889	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	890	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	891	-11,-21,
				892	-12,-22,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	893	},
				894	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	895
				896	// Expected output is 1 batch of a 1-channel 6x8 image.
				897	// Manually calculated like this:
				898	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				899	//[-110 -210 -120 -2211 ; -110 -210 -1211 -2221 ; -110 -210 -1221 -2231 ; -110 -210 -1231 -220 ..]
				900	//[-110 -2111 -120 -2212 ; -1111 -2121 -1212 -2222 ; -1121 -2131 -1222 -2232 ; -1131 -210 -1232 -220 ..]
				901	//[-110 -2112 -120 -2213 ; -1112 -2122 -1213 -2223 ; -1122 -2132 -1223 -2233 ; -1132 -210 -1233 -220 ..]
				902	//[-110 -2113 -120 -220 ; -1113 -2123 -120 -220 ; -1123 -2133 -120 -220 ; -1133 -210 -120 -220 ..]
				903	//[-110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ; -110 -210 -120 -220 ..]
				904	//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
				905	armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
				906	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	907	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	908	0, 0, 0, 0, 0, 0,
				909	-242, -594, -934, -372, 0, 0,
				910	-495, -1190, -1850, -725, 0, 0,
				911	-538, -1256, -1916, -748, 0, 0,
				912	-273, -626, -946, -363, 0, 0,
				913	0, 0, 0, 0, 0, 0,
				914	0, 0, 0, 0, 0, 0,
				915	0, 0, 0, 0, 0, 0
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	916	},
				917	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	918
				919	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				920	workloadFactory,
				921	memoryManager,
				922	input,
				923	kernel,
				924	GetBias2<ArmnnBType>(false, qScale * qScale),
				925	expectedOutput,
				926	qScale,
				927	qOffset,
				928	layout,
				929	1, // Padding left.
				930	2, // Padding top.
				931	3, // Padding right.
				932	4); // Padding bottom.
				933	}
				934
				935	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				936	typename T = armnn::ResolveType<ArmnnType>>
				937	LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
				938	armnn::IWorkloadFactory& workloadFactory,
				939	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				940	const armnn::DataLayout layout,
				941	float qScale,
				942	int32_t qOffset)
				943	{
				944	// Use a single-batch 1-channel 5x5 image as input.
				945	armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
				946	boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	947	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	948	11,21,31,41,51,
				949	12,22,32,42,52,
				950	13,23,33,43,53,
				951	14,24,34,44,54,
				952	15,25,35,45,55,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	953	}, qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	954
				955	// Use 1 batch of a 1-channel 4x4 kernel.
				956	armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
				957	boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	958	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	959	-11,-21,-31,-41,
				960	-12,-22,-32,-42,
				961	-13,-23,-33,-43,
				962	-14,-24,-34,-44,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	963	},
				964	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	965
				966	// Expected output is 1 batch of a 1-channel 5x5 image.
				967	armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
				968	std::vector<T> myVec(outputDesc.GetNumElements(), 0);
				969	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	970	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	971	-7140, -10580, -13940, -9300, -5230,
				972	-9590, -14120, -18520, -12290, -6860,
				973	-9980, -14560, -18960, -12560, -7000,
				974	-7518, -10904, -14144, -9318, -5152,
				975	-5032, -7256, -9376, -6142, -3368,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	976	},
				977	qScale, qOffset)));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	978
				979	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				980	workloadFactory,
				981	memoryManager,
				982	input,
				983	kernel,
				984	GetBias2<ArmnnBType>(false, qScale * qScale),
				985	expectedOutput,
				986	qScale,
				987	qOffset,
				988	layout,
				989	1, // Padding left.
				990	1, // Padding top.
				991	2, // Padding right.
				992	2); // Padding bottom.
				993	}
				994
				995	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				996	LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
				997	armnn::IWorkloadFactory& workloadFactory,
				998	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				999	const std::vector<float>& inputNoQuantizedValues,
				1000	armnn::TensorInfo& inputTensorInfo,
				1001	const std::vector<float>& kernelNoQuantizedValues,
				1002	armnn::TensorInfo& kernelTensorInfo,
				1003	const std::vector<float>& outputExpectedNoQuantizedValues,
				1004	armnn::TensorInfo& outputTensorInfo,
				1005	uint32_t dilationX,
				1006	uint32_t dilationY,
				1007	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				1008	uint32_t padLeft = 0,
				1009	uint32_t padTop = 0,
				1010	uint32_t padRight = 0,
				1011	uint32_t padBottom = 0,
				1012	uint32_t strideX = 1,
				1013	uint32_t strideY = 1,
				1014	bool biasEnabled = false
				1015	)
				1016	{
				1017	float qScale;
				1018	int32_t qOffset;
				1019	switch (ArmnnType)
				1020	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	1021	case armnn::DataType::QAsymmU8:
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1022	{
				1023	qScale = 0.1f;
				1024	qOffset = 128;
				1025	break;
				1026	}
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	1027	case armnn::DataType::QSymmS16:
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1028	{
				1029	qScale = 0.1f;
				1030	qOffset = 0;
				1031	break;
				1032	}
				1033	case armnn::DataType::Float32:
				1034	default:
				1035	{
				1036	qScale = 0.f;
				1037	qOffset = 0;
				1038	break;
				1039	}
				1040	}
				1041
				1042	inputTensorInfo.SetQuantizationScale(qScale);
				1043	inputTensorInfo.SetQuantizationOffset(qOffset);
				1044	kernelTensorInfo.SetQuantizationScale(qScale);
				1045	kernelTensorInfo.SetQuantizationOffset(qOffset);
				1046	outputTensorInfo.SetQuantizationScale(qScale);
				1047	outputTensorInfo.SetQuantizationOffset(qOffset);
				1048
				1049	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1050	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				1051	inputTensorInfo.GetQuantizationScale(),
				1052	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1053	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1054	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				1055	kernelTensorInfo.GetQuantizationScale(),
				1056	kernelTensorInfo.GetQuantizationOffset())));
				1057	auto expectedOutput =
				1058	MakeTensor<T, 4>(outputTensorInfo,
				1059	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				1060	outputTensorInfo.GetQuantizationScale(),
				1061	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1062
				1063	return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				1064	workloadFactory,
				1065	memoryManager,
				1066	input,
				1067	kernel,
				1068	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				1069	expectedOutput,
				1070	qScale,
				1071	qOffset,
				1072	layout,
				1073	padLeft,
				1074	padTop,
				1075	padRight,
				1076	padBottom,
				1077	strideX,
				1078	strideY,
				1079	dilationX,
				1080	dilationY);
				1081	}
				1082
				1083	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1084	LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
				1085	armnn::IWorkloadFactory& workloadFactory,
				1086	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1087	bool biasEnabled,
				1088	const armnn::DataLayout layout)
				1089	{
				1090	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1091	std::vector<float> inputNoQuantizedValues =
				1092	{
				1093	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1094	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1095	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1096	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1097	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1098	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1099	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1100	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1101	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1102	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1103	};
				1104
				1105	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				1106	std::vector<float> kernelNoQuantizedValues =
				1107	{
				1108	1, 2, 3,
				1109	4, 5, 6,
				1110	7, 8, 9
				1111	};
				1112
				1113	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1114	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1115	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1116	std::vector<float> outputExpectedNoQuantizedValues =
				1117	{
				1118	6., 5., 5., 5.,
				1119	6., 5., 5., 5.,
				1120	6., 5., 5., 5.,
				1121	3., 2., 2., 2.
				1122	};
				1123
				1124	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1125	workloadFactory,
				1126	memoryManager,
				1127	inputNoQuantizedValues,
				1128	inputTensorInfo,
				1129	kernelNoQuantizedValues,
				1130	kernelTensorInfo,
				1131	outputExpectedNoQuantizedValues,
				1132	outputTensorInfo,
				1133	3,
				1134	3,
				1135	layout,
				1136	biasEnabled);
				1137	}
				1138
				1139	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1140	LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
				1141	armnn::IWorkloadFactory& workloadFactory,
				1142	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1143	bool biasEnabled,
				1144	const armnn::DataLayout layout)
				1145	{
				1146	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				1147	std::vector<float> inputNoQuantizedValues =
				1148	{
				1149	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1150	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1151	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1152	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1153	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1154	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1155	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1156	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1157	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1158	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1159
				1160	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1161	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1162	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1163	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1164	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1165	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				1166	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1167	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1168	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1169	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				1170	};
				1171
				1172	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				1173	std::vector<float> kernelNoQuantizedValues =
				1174	{
				1175	1, 2, 3,
				1176	4, 5, 6,
				1177	7, 8, 9,
				1178
				1179	1, 2, 3,
				1180	4, 5, 6,
				1181	7, 8, 9
				1182	};
				1183
				1184	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				1185	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				1186	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1187	std::vector<float> outputExpectedNoQuantizedValues =
				1188	{
				1189	12., 10., 10., 10.,
				1190	12., 10., 10., 10.,
				1191	12., 10., 10., 10.,
				1192	6., 4., 4., 4.
				1193	};
				1194
				1195	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1196	workloadFactory,
				1197	memoryManager,
				1198	inputNoQuantizedValues,
				1199	inputTensorInfo,
				1200	kernelNoQuantizedValues,
				1201	kernelTensorInfo,
				1202	outputExpectedNoQuantizedValues,
				1203	outputTensorInfo,
				1204	3,
				1205	3,
				1206	layout,
				1207	biasEnabled);
				1208	}
				1209
				1210	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				1211	LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
				1212	armnn::IWorkloadFactory &workloadFactory,
				1213	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				1214	bool biasEnabled,
				1215	const armnn::DataLayout layout)
				1216	{
				1217	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				1218	std::vector<float> inputNoQuantizedValues =
				1219	{
				1220	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1221	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1222	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1223	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1224	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1225	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1226	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1227	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1228	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				1229	1, 1, 1, 1, 1, 1, 1, 1, 1, 1
				1230	};
				1231
				1232	armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
				1233	std::vector<float> kernelNoQuantizedValues =
				1234	{
				1235	1, 2,
				1236	3, 4
				1237	};
				1238
				1239	// Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
				1240	// therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
				1241	// where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
				1242	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				1243	std::vector<float> outputExpectedNoQuantizedValues =
				1244	{
				1245	4, 7, 7, 3,
				1246	6, 10, 10, 4,
				1247	6, 10, 10, 4,
				1248	2, 3, 3, 1
				1249	};
				1250	uint32_t padLeft = 1;
				1251	uint32_t padTop = 1;
				1252	uint32_t padRight = 1;
				1253	uint32_t padBottom = 1;
				1254
				1255	return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				1256	workloadFactory,
				1257	memoryManager,
				1258	inputNoQuantizedValues,
				1259	inputTensorInfo,
				1260	kernelNoQuantizedValues,
				1261	kernelTensorInfo,
				1262	outputExpectedNoQuantizedValues,
				1263	outputTensorInfo,
				1264	2,
				1265	2,
				1266	layout,
				1267	padLeft,
				1268	padTop,
				1269	padRight,
				1270	padBottom,
				1271	3,
				1272	3,
				1273	biasEnabled
				1274	);
				1275	}
				1276
				1277	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				1278	LayerTestResult<T,4> CompareConvolution2dTestImpl(
				1279	armnn::IWorkloadFactory& workloadFactory,
				1280	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1281	armnn::IWorkloadFactory& refWorkloadFactory)
				1282	{
				1283	unsigned int inputHeight = 8;
				1284	unsigned int inputWidth = 16;
				1285	unsigned int inputChannels = 3;
				1286	unsigned int inputNum = 5;
				1287
				1288	unsigned int kernelHeight = 3;
				1289	unsigned int kernelWidth = 3;
				1290
				1291	unsigned int strideX = 2;
				1292	unsigned int strideY = 3;
				1293	unsigned int padX = 1;
				1294	unsigned int padY = 1;
				1295
				1296	unsigned int outputNum = inputNum;
				1297	unsigned int outputChannels = 2;
				1298	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				1299	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				1300
				1301	armnn::TensorInfo inputTensorInfo;
				1302	armnn::TensorInfo outputTensorInfo;
				1303	armnn::TensorInfo kernelDesc;
				1304	armnn::TensorInfo biasDesc;
				1305
				1306	unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
				1307	unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
				1308	unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
				1309	unsigned int biasShape[] = {outputChannels};
				1310
				1311	inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
				1312	outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
				1313	kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
				1314	biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
				1315
				1316	LayerTestResult<T,4> ret(outputTensorInfo);
				1317
				1318	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
				1319	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
				1320	auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
				1321
				1322	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1323	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1324
				1325	armnn::Convolution2dQueueDescriptor data;
				1326	armnn::WorkloadInfo info;
				1327	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1328	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1329
				1330	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1331	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1332
				1333	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1334	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1335	data.m_Weight = &weightsTensor;
				1336	data.m_Bias = &biasTensor;
				1337	data.m_Parameters.m_StrideX = strideX;
				1338	data.m_Parameters.m_StrideY = strideY;
				1339	data.m_Parameters.m_PadLeft = padX;
				1340	data.m_Parameters.m_PadRight = padX;
				1341	data.m_Parameters.m_PadTop = padY;
				1342	data.m_Parameters.m_PadBottom = padY;
				1343	data.m_Parameters.m_BiasEnabled = true;
				1344
				1345	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				1346	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				1347
				1348	armnn::Convolution2dQueueDescriptor refData = data;
				1349	armnn::WorkloadInfo refInfo = info;
				1350	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				1351	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				1352
				1353	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
				1354	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
				1355
				1356	outputHandleRef->Allocate();
				1357	inputHandleRef->Allocate();
				1358
				1359	inputHandle->Allocate();
				1360	outputHandle->Allocate();
				1361
				1362	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1363	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				1364
				1365	ExecuteWorkload(*workload, memoryManager);
				1366
				1367	workloadRef->PostAllocationConfigure();
				1368	workloadRef->Execute();
				1369
				1370	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1371	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				1372
				1373	return ret;
				1374	}
				1375
Narumol Prangnawarat	e8cddeb	2020-04-01 16:51:23 +0100	[diff] [blame^]	1376	LayerTestResult<float, 4> Convolution2d3x3Stride2x2BFloat16Test(
				1377	armnn::IWorkloadFactory& workloadFactory,
				1378	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1379	bool biasEnabled,
				1380	const armnn::DataLayout& dataLayout)
				1381	{
				1382	// BFloat16 input and weight, Float32 output
				1383	armnn::IgnoreUnused(biasEnabled);
				1384
				1385	// Input is a single-batch, 1 channel, 5x5 image.
				1386	armnn::TensorInfo inputDesc({1, 5, 5, 1}, armnn::DataType::BFloat16);
				1387
				1388	std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
				1389	{
				1390	10.0367984f, // 10.0625
				1391	2.0380895f, // 2.03125
				1392	15.0420157f, // 15.0625
				1393	22.0675631f, // 22.125
				1394	8.0938920f, // 8.125
				1395	5.0476106f, // 5.0625
				1396	80.1035490f, // 80
				1397	100.1260370f, // 100
				1398	55.0461647f, // 55
				1399	120.0883828f, // 120
				1400	9.1159540f, // 9.125
				1401	90.0498519f, // 90
				1402	200.0104630f, // 200
				1403	30.0154114f, // 30
				1404	75.00137681f, // 75
				1405	30.0344238f, // 30
				1406	25.0356445f, // 25
				1407	130.0495605f, // 130
				1408	60.0683594f, // 60
				1409	35.0991211f, // 35
				1410	8.0461426f, // 8.0625
				1411	12.0996094f, // 12.125
				1412	98.1269530f, // 98
				1413	125.0393066f, // 125
				1414	5.103516f // 5.0937
				1415	},
				1416	1.0f, 0);
				1417
				1418	auto input = MakeTensor<armnn::BFloat16, 4>(inputDesc, inputValues);
				1419
				1420	// Use a 3x3 kernel.
				1421	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
				1422
				1423	std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
				1424	{
				1425	-0.126184f, // -0.125977
				1426	-0.150468f, // -0.150391
				1427	-0.101412f, // -0.101562
				1428	-0.0586369f,// -0.0585938
				1429	-0.0865864f,// -0.0864258
				1430	-0.0435089f,// -0.043457
				1431	0.0347555f, // 0.034668
				1432	0.0323111f, // 0.0322266
				1433	0.0385381f // 0.0385742
				1434	},
				1435	1.0f, 0);
				1436
				1437	auto kernel = MakeTensor<armnn::BFloat16, 4>(kernelDesc, kernelValues);
				1438
				1439	// Expected output is a single-batch, 1 channel, 3x3 image.
				1440	armnn::TensorInfo outputDesc({1, 3, 3, 1}, armnn::DataType::Float32);
				1441
				1442	// Expected output (with results if calculated as FP32 in the comments)
				1443	const std::vector<float> outputData =
				1444	{
				1445	2.296875f, // 2.29240716
				1446	5.75f, // 5.75851926
				1447	3.78125f, // 3.79855026
				1448	-11.625f, // -11.65498118
				1449	-47.25f, // -47.27316893
				1450	-30.0f, // -30.04771684
				1451	-8.25f, // -8.28126168
				1452	-43.5f, // -43.46531337
				1453	-20.625f // -20.63477281
				1454	};
				1455
				1456	boost::multi_array<float, 4> expectedOutput = MakeTensor<float, 4>(outputDesc, outputData);
				1457
				1458	uint32_t padLeft = 1;
				1459	uint32_t padTop = 1;
				1460	uint32_t padRight = 1;
				1461	uint32_t padBottom = 1;
				1462	uint32_t strideX = 2;
				1463	uint32_t strideY = 2;
				1464
				1465	return SimpleConvolution2dNhwcTestImpl
				1466	<armnn::DataType::BFloat16, armnn::DataType::Float32, armnn::BFloat16, float, armnn::DataType::Float32, float>(
				1467	workloadFactory,
				1468	memoryManager,
				1469	input,
				1470	kernel,
				1471	boost::multi_array<float, 1>(),
				1472	expectedOutput,
				1473	dataLayout,
				1474	1.0f,
				1475	0,
				1476	padLeft,
				1477	padTop,
				1478	padRight,
				1479	padBottom,
				1480	strideX,
				1481	strideY);
				1482	}
				1483
				1484	LayerTestResult<float, 4> Convolution2d3x3Stride2x2BFloat16SmallValueTest(
				1485	armnn::IWorkloadFactory& workloadFactory,
				1486	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1487	bool biasEnabled,
				1488	const armnn::DataLayout& dataLayout)
				1489	{
				1490	// BFloat16 input and weight, Float32 output
				1491	armnn::IgnoreUnused(biasEnabled);
				1492
				1493	// Input is a single-batch, 1 channel, 5x5 image.
				1494	armnn::TensorInfo inputDesc({1, 5, 5, 1}, armnn::DataType::BFloat16);
				1495
				1496	std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
				1497	{
				1498	0.0367984f, // 0.0368652
				1499	0.0380895f, // 0.0380859
				1500	0.0420157f, // 0.0419922
				1501	0.0675631f, // 0.0673828
				1502	0.0938920f, // 0.09375
				1503	0.0476106f, // 0.0476074
				1504	0.1035490f, // 0.103516
				1505	0.1260370f, // 0.125977
				1506	0.0461647f, // 0.0461426
				1507	0.0883828f, // 0.0883789
				1508	0.1159540f, // 0.115723
				1509	0.0498519f, // 0.0498047
				1510	0.0104630f, // 0.010437
				1511	0.0154114f, // 0.0154419
				1512	0.00137681f, // 0.00137329
				1513	0.0344238f, // 0.0344616
				1514	0.0356445f, // 0.0355693
				1515	0.0495605f, // 0.0495018
				1516	0.0683594f, // 0.0683308
				1517	0.0991211f, // 0.0988837
				1518	0.0461426f, // 0.0461838
				1519	0.0996094f, // 0.0997546
				1520	0.1269530f, // 0.127099
				1521	0.0393066f, // 0.0392791
				1522	0.103516f // 0.103641
				1523	},
				1524	1.0f, 0);
				1525
				1526	auto input = MakeTensor<armnn::BFloat16, 4>(inputDesc, inputValues);
				1527
				1528	// Use a 3x3 kernel.
				1529	armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
				1530
				1531	std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
				1532	{
				1533	-0.126184f, // -0.125977
				1534	-0.150468f, // -0.150391
				1535	-0.101412f, // -0.101562
				1536	-0.0586369f,// -0.0585938
				1537	-0.0865864f,// -0.0864258
				1538	-0.0435089f,// -0.043457
				1539	0.0347555f, // 0.034668
				1540	0.0323111f, // 0.0322266
				1541	0.0385381f // 0.0385742
				1542	},
				1543	1.0f, 0);
				1544
				1545	auto kernel = MakeTensor<armnn::BFloat16, 4>(kernelDesc, kernelValues);
				1546
				1547	// Expected output is a single-batch, 1 channel, 3x3 image.
				1548	armnn::TensorInfo outputDesc({1, 3, 3, 1}, armnn::DataType::Float32);
				1549
				1550	// Expected output (with results if calculated as FP32 in the comments)
				1551	const std::vector<float> outputData =
				1552	{
				1553	0.000686645508f, // 0.000685
				1554	0.000640869141f, // 0.000639
				1555	-0.00759887695f, // -0.007631
				1556	-0.02734375f, // -0.027388
				1557	-0.0356445312f, // -0.035737
				1558	-0.0145874023f, // -0.014568
				1559	-0.0170898438f, // -0.017124
				1560	-0.0373535156f, // -0.037431
				1561	-0.0346679688f // -0.034808
				1562	};
				1563
				1564	boost::multi_array<float, 4> expectedOutput = MakeTensor<float, 4>(outputDesc, outputData);
				1565
				1566	uint32_t padLeft = 1;
				1567	uint32_t padTop = 1;
				1568	uint32_t padRight = 1;
				1569	uint32_t padBottom = 1;
				1570	uint32_t strideX = 2;
				1571	uint32_t strideY = 2;
				1572
				1573	return SimpleConvolution2dNhwcTestImpl
				1574	<armnn::DataType::BFloat16, armnn::DataType::Float32, armnn::BFloat16, float, armnn::DataType::Float32, float>(
				1575	workloadFactory,
				1576	memoryManager,
				1577	input,
				1578	kernel,
				1579	boost::multi_array<float, 1>(),
				1580	expectedOutput,
				1581	dataLayout,
				1582	1.0f,
				1583	0,
				1584	padLeft,
				1585	padTop,
				1586	padRight,
				1587	padBottom,
				1588	strideX,
				1589	strideY);
				1590	}
				1591
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1592	//
				1593	// DepthwiseConvolution2d implementations
				1594	//
				1595
				1596	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				1597	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				1598	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
				1599	armnn::IWorkloadFactory& workloadFactory,
				1600	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1601	const boost::multi_array<T, 4>& input,
				1602	const boost::multi_array<T, 4>& kernel,
				1603	const boost::multi_array<B, 1>& bias,
				1604	const boost::multi_array<T, 4>& outputExpected,
				1605	float qScale,
				1606	int32_t qOffset,
				1607	const armnn::DataLayout layout,
				1608	uint32_t padLeft = 0,
				1609	uint32_t padTop = 0,
				1610	uint32_t padRight = 0,
				1611	uint32_t padBottom = 0,
				1612	uint32_t strideX = 1,
				1613	uint32_t strideY = 1)
				1614	{
				1615	unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
				1616	unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
				1617	unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
				1618	unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
				1619	unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
				1620	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
				1621	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
				1622	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
				1623	unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
				1624	unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
				1625	unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
				1626	unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
				1627
				1628	// If a bias is used, its size must equal the number of output channels.
				1629	bool biasEnabled = bias.size() > 0;
				1630	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				1631
				1632	// Creates the tensors.
				1633	armnn::TensorInfo inputTensorInfo =
				1634	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1635	armnn::TensorInfo outputTensorInfo =
				1636	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1637	armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				1638	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				1639
				1640	// Set quantization parameters if the requested type is a quantized type.
				1641	if (armnn::IsQuantizedType<T>())
				1642	{
				1643	inputTensorInfo.SetQuantizationScale(qScale);
				1644	inputTensorInfo.SetQuantizationOffset(qOffset);
				1645	outputTensorInfo.SetQuantizationScale(qScale);
				1646	outputTensorInfo.SetQuantizationOffset(qOffset);
				1647	kernelDesc.SetQuantizationScale(qScale);
				1648	kernelDesc.SetQuantizationOffset(qOffset);
				1649	biasDesc.SetQuantizationScale(qScale*qScale);
				1650	biasDesc.SetQuantizationOffset(0);
				1651	}
				1652
				1653	// Construct the input data.
				1654	std::vector<T> inputData;
				1655	inputData.assign(input.data(), input.data() + inputChannelsinputHeightinputWidth);
				1656
				1657	// At this point if we require it permute the input data
				1658	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1659	if (layout == armnn::DataLayout::NHWC)
				1660	{
				1661	std::vector<T> tmp(inputData.size());
				1662	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1663	inputData = tmp;
				1664	}
				1665
				1666	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1667
				1668	// Construct the output data, with bias applied, as appropriate.
				1669	std::vector<T> outputData;
				1670	outputData.assign(outputExpected.data(), outputExpected.data() + outputChannelsoutputHeightoutputWidth);
				1671	if (biasEnabled)
				1672	{
				1673	std::vector<T> biasV;
				1674	biasV.assign(bias.data(), bias.data() + outputChannels);
				1675	ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1676	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1677	outputWidth, outputHeight);
				1678	}
				1679
				1680	LayerTestResult<T, 4> ret(outputTensorInfo);
				1681
				1682	// At this point if we require it permute the expected output
				1683	if (layout == armnn::DataLayout::NHWC)
				1684	{
				1685	std::vector<T> tmp(outputData.size());
				1686	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				1687	outputData = tmp;
				1688	}
				1689
				1690	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				1691
				1692	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1693	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1694
				1695	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1696
				1697	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1698
				1699	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1700	if (biasEnabled)
				1701	{
				1702	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1703	}
				1704
				1705	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1706	data.m_Weight = &weightsTensor;
				1707	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
				1708	data.m_Parameters.m_StrideX = strideX;
				1709	data.m_Parameters.m_StrideY = strideY;
				1710	data.m_Parameters.m_PadLeft = padLeft;
				1711	data.m_Parameters.m_PadRight = padRight;
				1712	data.m_Parameters.m_PadTop = padTop;
				1713	data.m_Parameters.m_PadBottom = padBottom;
				1714	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1715	data.m_Parameters.m_DataLayout = layout;
				1716
				1717	armnn::WorkloadInfo info;
				1718	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1719	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1720
				1721	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1722	inputHandle->Allocate();
				1723	outputHandle->Allocate();
				1724
				1725	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				1726
				1727	ExecuteWorkload(*workload, memoryManager);
				1728
				1729	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1730
				1731	return ret;
				1732	}
				1733
				1734	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1735	LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
				1736	armnn::IWorkloadFactory& workloadFactory,
				1737	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1738	float qScale,
				1739	int32_t qOffset,
				1740	bool biasEnabled,
				1741	const armnn::DataLayout layout)
				1742	{
				1743	using B = armnn::ResolveType<ArmnnBType>;
				1744
				1745	unsigned int inputHeight = 3;
				1746	unsigned int inputWidth = 3;
				1747	unsigned int inputChannels = 2;
				1748	unsigned int inputNum = 1;
				1749
				1750	unsigned int kernelHeight = 3;
				1751	unsigned int kernelWidth = 3;
				1752	unsigned int kernelChannels = inputChannels;
				1753	unsigned int kernelDepthMultiplier = 1;
				1754
				1755	unsigned int outputHeight = 1;
				1756	unsigned int outputWidth = 1;
				1757	unsigned int outputChannels = kernelChannels;
				1758	unsigned int outputNum = inputNum;
				1759
				1760	armnn::TensorInfo inputTensorInfo =
				1761	armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1762	armnn::TensorInfo outputTensorInfo =
				1763	armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1764	armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
				1765	ArmnnType);
				1766	armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
				1767
				1768	// Set quantization parameters if the requested type is a quantized type.
				1769	if(armnn::IsQuantizedType<T>())
				1770	{
				1771	inputTensorInfo.SetQuantizationScale(qScale);
				1772	inputTensorInfo.SetQuantizationOffset(qOffset);
				1773	outputTensorInfo.SetQuantizationScale(qScale);
				1774	outputTensorInfo.SetQuantizationOffset(qOffset);
				1775	kernelDesc.SetQuantizationScale(qScale);
				1776	kernelDesc.SetQuantizationOffset(qOffset);
				1777	biasDesc.SetQuantizationScale(qScale*qScale);
				1778	biasDesc.SetQuantizationOffset(0);
				1779	}
				1780	std::vector<T> inputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1781	QuantizedVector<T>({
				1782	1.f, 2.f, 1.f,
				1783	2.f, 1.f, 2.f,
				1784	1.f, 2.f, 1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1785
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1786	1.f, 2.f, 1.f,
				1787	2.f, 1.f, 2.f,
				1788	1.f, 2.f, 1.f,
				1789	},
				1790	inputTensorInfo.GetQuantizationScale(),
				1791	inputTensorInfo.GetQuantizationOffset()));
				1792
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1793	// at this point if we require it permute the input data
				1794	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1795	if (layout == armnn::DataLayout::NHWC)
				1796	{
				1797	std::vector<T> tmp(inputData.size());
				1798	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				1799	inputData = tmp;
				1800	}
				1801	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1802
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1803	std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
				1804	biasDesc.GetQuantizationScale(),
				1805	biasDesc.GetQuantizationOffset()));
				1806
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1807	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1808
				1809	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1810	QuantizedVector<T>({
				1811	1.f, 0.f, 1.f,
				1812	0.f, 0.f, 0.f,
				1813	-1.f, 0.f, -1.f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1814
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1815	1.f, 0.f, 1.f,
				1816	0.f, 0.f, 0.f,
				1817	-1.f, 0.f, -1.f,
				1818	},
				1819	kernelDesc.GetQuantizationScale(),
				1820	kernelDesc.GetQuantizationOffset()));
				1821
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1822	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				1823
				1824	// Manually calculated.
				1825	std::vector<T> outputImage(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1826	QuantizedVector<T>({ 0.f, 0.f },
				1827	outputTensorInfo.GetQuantizationScale(),
				1828	outputTensorInfo.GetQuantizationOffset())
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1829	);
				1830
				1831	// Optionally apply bias to output image.
				1832	if(biasEnabled)
				1833	{
				1834	ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				1835	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				1836	outputWidth, outputHeight);
				1837	}
				1838
				1839	LayerTestResult<T, 4> ret(outputTensorInfo);
				1840	if (layout == armnn::DataLayout::NHWC)
				1841	{
				1842	std::vector<T> tmp(outputImage.size());
				1843	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
				1844	outputImage = tmp;
				1845	}
				1846
				1847	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				1848
				1849	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				1850	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				1851
				1852	armnn::DepthwiseConvolution2dQueueDescriptor data;
				1853	armnn::WorkloadInfo info;
				1854	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				1855	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				1856
				1857	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				1858	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				1859
				1860	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				1861	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				1862
				1863	data.m_Weight = &weightsTensor;
				1864	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				1865	data.m_Parameters.m_StrideX = 1;
				1866	data.m_Parameters.m_StrideY = 1;
				1867	data.m_Parameters.m_PadLeft = 0;
				1868	data.m_Parameters.m_PadRight = 0;
				1869	data.m_Parameters.m_PadTop = 0;
				1870	data.m_Parameters.m_PadBottom = 0;
				1871	data.m_Parameters.m_BiasEnabled = biasEnabled;
				1872	data.m_Parameters.m_DataLayout = layout;
				1873
				1874	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				1875	inputHandle->Allocate();
				1876	outputHandle->Allocate();
				1877
				1878	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				1879
				1880	ExecuteWorkload(*workload, memoryManager);
				1881
				1882	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				1883
				1884	return ret;
				1885	}
				1886
				1887	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				1888	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				1889	armnn::IWorkloadFactory& workloadFactory,
				1890	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				1891	float qScale,
				1892	int32_t qOffset,
				1893	bool biasEnabled,
				1894	const armnn::DataLayout layout)
				1895	{
				1896	using B = armnn::ResolveType<ArmnnBType>;
				1897
				1898	unsigned int depthMultiplier = 2;
				1899
				1900	unsigned int inputHeight = 8;
				1901	unsigned int inputWidth = 16;
				1902	unsigned int inputChannels = 2;
				1903	unsigned int inputBatchSize = 1;
				1904
				1905	unsigned int kernelHeight = 5;
				1906	unsigned int kernelWidth = 3;
				1907
				1908	unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
				1909	unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
				1910	unsigned int outputChannels = inputChannels * depthMultiplier;
				1911	unsigned int outputBatchSize = inputBatchSize;
				1912
				1913	armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
				1914	inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				1915	armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
				1916	outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				1917	armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
				1918	ArmnnType);
				1919	armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
				1920
				1921	// Set quantization parameters if the requested type is a quantized type.
				1922	if(armnn::IsQuantizedType<T>())
				1923	{
				1924	inputTensorInfo.SetQuantizationScale(qScale);
				1925	inputTensorInfo.SetQuantizationOffset(qOffset);
				1926	outputTensorInfo.SetQuantizationScale(qScale);
				1927	outputTensorInfo.SetQuantizationOffset(qOffset);
				1928	kernelDesc.SetQuantizationScale(qScale);
				1929	kernelDesc.SetQuantizationOffset(qOffset);
				1930	biasDesc.SetQuantizationScale(qScale*qScale);
				1931	biasDesc.SetQuantizationOffset(0);
				1932	}
				1933
				1934	// NOTE: originalInputData is in NCHW format
				1935	std::vector<T> originalInputData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1936	QuantizedVector<T>({
				1937	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1938	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1939	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1940	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1941	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1942	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1943	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1944	0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
				1945	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1946	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1947	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1948	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1949	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1950	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1951	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				1952	0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				1953	},
				1954	inputTensorInfo.GetQuantizationScale(),
				1955	inputTensorInfo.GetQuantizationOffset()));
				1956
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1957	std::vector<T> inputData = originalInputData;
				1958	// at this point if we require it permute the input data
				1959	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				1960	if (layout == armnn::DataLayout::NHWC)
				1961	{
				1962	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
				1963	originalInputData.data(), inputData.data(), sizeof(T));
				1964	}
				1965	auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
				1966
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1967	std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
				1968	biasDesc.GetQuantizationScale(),
				1969	biasDesc.GetQuantizationOffset());
				1970
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1971	auto bias = MakeTensor<B, 1>(biasDesc, biasV);
				1972
				1973	std::vector<T> kernelData = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1974	QuantizedVector<T>({
				1975	1, 1, 1,
				1976	1, -1, 1,
				1977	1, 1, 1,
				1978	1, 1, 1,
				1979	1, 1, 1,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1980
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1981	2, 2, 2,
				1982	2, 2, 2,
				1983	2, 2, 2,
				1984	2, 2, 2,
				1985	2, 2, 2,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1986
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1987	0, 0, 0,
				1988	0, -1, 0,
				1989	0, 0, 0,
				1990	0, 0, 0,
				1991	0, 0, 0,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	1992
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	1993	0, 0, 0,
				1994	0, 0, 0,
				1995	0, 1, 0,
				1996	0, 0, 0,
				1997	0, 0, 0
				1998	},
				1999	kernelDesc.GetQuantizationScale(),
				2000	kernelDesc.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2001
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2002	auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
				2003
				2004	// Manually calculated.
				2005	std::vector<T> originalOutputImage = std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2006	QuantizedVector<T>({
				2007	3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
				2008	6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
				2009	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
				2010	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				2011	6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
				2012	5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2013
				2014	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2015	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2016	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				2017	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				2018	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				2019	-0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
				2020
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2021	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2022	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				2023	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				2024	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				2025	10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2026	8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2027
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2028	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				2029	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				2030	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				2031	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				2032	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
				2033	0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
				2034	},
				2035	outputTensorInfo.GetQuantizationScale(),
				2036	outputTensorInfo.GetQuantizationOffset()));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2037
				2038	// Optionally apply bias to output image.
				2039	if(biasEnabled)
				2040	{
				2041	ApplyBias(originalOutputImage,
				2042	outputTensorInfo.GetQuantizationScale(),
				2043	outputTensorInfo.GetQuantizationOffset(),
				2044	biasV,
				2045	biasDesc.GetQuantizationScale(),
				2046	biasDesc.GetQuantizationOffset(),
				2047	outputWidth,
				2048	outputHeight);
				2049	}
				2050
				2051	LayerTestResult<T, 4> ret(outputTensorInfo);
				2052	std::vector<T> outputImage = originalOutputImage;
				2053	if (layout == armnn::DataLayout::NHWC)
				2054	{
				2055	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
				2056	originalOutputImage.data(), outputImage.data(), sizeof(T));
				2057	}
				2058
				2059	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
				2060
				2061	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2062	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2063
				2064	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2065	armnn::WorkloadInfo info;
				2066	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2067	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2068
				2069	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2070	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2071
				2072	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2073	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2074
				2075	data.m_Weight = &weightsTensor;
				2076	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
				2077	data.m_Parameters.m_StrideX = 2;
				2078	data.m_Parameters.m_StrideY = 1;
				2079	data.m_Parameters.m_PadLeft = 0;
				2080	data.m_Parameters.m_PadRight = 0;
				2081	data.m_Parameters.m_PadTop = 1;
				2082	data.m_Parameters.m_PadBottom = 1;
				2083	data.m_Parameters.m_BiasEnabled = biasEnabled;
				2084	data.m_Parameters.m_DataLayout = layout;
				2085
				2086	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2087	inputHandle->Allocate();
				2088	outputHandle->Allocate();
				2089
				2090	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				2091
				2092	ExecuteWorkload(*workload, memoryManager);
				2093
				2094	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2095
				2096	return ret;
				2097	}
				2098
				2099	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2100	typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
				2101	LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
				2102	armnn::IWorkloadFactory& workloadFactory,
				2103	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2104	const boost::multi_array<T, 4>& originalInput,
				2105	const boost::multi_array<T, 4>& originalKernel,
				2106	const boost::multi_array<B, 1>& bias,
				2107	const boost::multi_array<T, 4>& originalOutputExpected,
				2108	float qScale,
				2109	int32_t qOffset,
				2110	const armnn::DataLayout layout = armnn::DataLayout::NCHW,
				2111	uint32_t padLeft = 0,
				2112	uint32_t padTop = 0,
				2113	uint32_t padRight = 0,
				2114	uint32_t padBottom = 0,
				2115	uint32_t strideX = 1,
				2116	uint32_t strideY = 1,
				2117	uint32_t dilationX = 1,
				2118	uint32_t dilationY = 1)
				2119	{
				2120	unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
				2121	unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
				2122	unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
				2123	unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
				2124
				2125	unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
				2126	unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
				2127	unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
				2128	unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
				2129
				2130	unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
				2131	unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
				2132	unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
				2133	unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
				2134
				2135	bool biasEnabled = bias.size() > 0;
				2136
				2137	// This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
				2138	BOOST_ASSERT(inputNum == 1);
				2139	BOOST_ASSERT(outputNum == 1);
				2140
				2141	// If a bias is used, its size must equal the number of output channels.
				2142	BOOST_ASSERT(!biasEnabled \|\| bias.size() == outputChannels);
				2143
				2144
				2145	// Note these tensors will use two (identical) batches.
				2146	armnn::TensorInfo inputTensorInfo =
				2147	armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
				2148	armnn::TensorInfo outputTensorInfo =
				2149	armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
				2150
				2151	// Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
				2152	armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
				2153
				2154	armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
				2155
				2156	// Set quantization parameters if the requested type is a quantized type.
				2157	if(armnn::IsQuantizedType<T>())
				2158	{
				2159	inputTensorInfo.SetQuantizationScale(qScale);
				2160	inputTensorInfo.SetQuantizationOffset(qOffset);
				2161	outputTensorInfo.SetQuantizationScale(qScale);
				2162	outputTensorInfo.SetQuantizationOffset(qOffset);
				2163	kernelDesc.SetQuantizationScale(qScale);
				2164	kernelDesc.SetQuantizationOffset(qOffset);
				2165	biasDesc.SetQuantizationScale(qScale*qScale);
				2166	biasDesc.SetQuantizationOffset(0);
				2167	}
				2168
				2169	LayerTestResult<T, 4> ret(outputTensorInfo);
				2170
				2171	// Construct input data
				2172	std::vector<T> input;
				2173	input.assign(originalInput.data(), originalInput.data() + 1inputChannelsinputHeight*inputWidth);
				2174	std::vector<T> inputData;
				2175	inputData.insert(inputData.end(), input.begin(), input.end());
				2176	inputData.insert(inputData.end(), input.begin(), input.end());
				2177
				2178	// at this point if we require it permute the input data
				2179	const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
				2180	if (layout == armnn::DataLayout::NHWC)
				2181	{
				2182	std::vector<T> tmp(inputData.size());
				2183	armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
				2184	inputData = tmp;
				2185	}
				2186
				2187	auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
				2188
				2189	std::vector<T> output;
				2190	output.assign(originalOutputExpected.data(),
				2191	originalOutputExpected.data() + outputChannelsoutputHeightoutputWidth);
				2192
				2193	// Apply bias to output data if it is enabled.
				2194	if(biasEnabled)
				2195	{
				2196	std::vector<T> biasV;
				2197	biasV.assign(bias.data(), bias.data() + outputChannels);
				2198	ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
				2199	biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
				2200	outputWidth, outputHeight);
				2201	}
				2202
				2203	// Construct expected output data
				2204	std::vector<T> outputData;
				2205	outputData.insert(outputData.end(), output.begin(), output.end());
				2206	outputData.insert(outputData.end(), output.begin(), output.end());
				2207
				2208	// at this point if we require it permute the expected output
				2209	if (layout == armnn::DataLayout::NHWC)
				2210	{
				2211	std::vector<T> tmp(outputData.size());
				2212	armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
				2213	outputData = tmp;
				2214	}
				2215	ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
				2216
				2217	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2218	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2219
				2220	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2221	armnn::WorkloadInfo info;
				2222	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2223	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2224
				2225	boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
				2226	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2227
				2228	if(biasEnabled)
				2229	{
				2230	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2231	}
				2232
				2233	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2234	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2235
				2236	data.m_Weight = &weightsTensor;
				2237	data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
				2238	data.m_Parameters.m_StrideX = strideX;
				2239	data.m_Parameters.m_StrideY = strideY;
				2240	data.m_Parameters.m_PadLeft = padLeft;
				2241	data.m_Parameters.m_PadRight = padRight;
				2242	data.m_Parameters.m_PadTop = padTop;
				2243	data.m_Parameters.m_PadBottom = padBottom;
				2244	data.m_Parameters.m_BiasEnabled = biasEnabled;
				2245	data.m_Parameters.m_DataLayout = layout;
				2246	data.m_Parameters.m_DilationX = dilationX;
				2247	data.m_Parameters.m_DilationY = dilationY;
				2248
				2249	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2250	inputHandle->Allocate();
				2251	outputHandle->Allocate();
				2252
				2253	CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
				2254
				2255	ExecuteWorkload(*workload, memoryManager);
				2256
				2257	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				2258
				2259	return ret;
				2260	}
				2261
				2262	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2263	typename T = armnn::ResolveType<ArmnnType>>
				2264	LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
				2265	armnn::IWorkloadFactory& workloadFactory,
				2266	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2267	float qScale,
				2268	int32_t qOffset,
				2269	bool biasEnabled,
				2270	const armnn::DataLayout layout)
				2271	{
				2272	// Use a single-batch 2-channel 5x5 image as input.
				2273	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2274	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2275	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2276	0, 1, 2, 3, 4,
				2277	5, 6, 7, 8, 9,
				2278	10, 11, 12, 13, 14,
				2279	15, 16, 17, 18, 19,
				2280	20, 21, 22, 23, 24,
				2281
				2282	25, 26, 27, 28, 29,
				2283	30, 31, 32, 33, 34,
				2284	35, 36, 37, 38, 39,
				2285	40, 41, 42, 43, 44,
				2286	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2287	},
				2288	inputTensorInfo.GetQuantizationScale(),
				2289	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2290
				2291	// Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
				2292	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2293	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2294	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2295	32, 31, 30, 29,
				2296	28, 27, 26, 25,
				2297	24, 23, 22, 21,
				2298	20, 19, 18, 17,
				2299
				2300	16, 15, 14, 13,
				2301	12, 11, 10, 9,
				2302	8, 7, 6, 5,
				2303	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2304	},
				2305	kernelTensorInfo.GetQuantizationScale(),
				2306	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2307
				2308	// Expected output is 1 batch of a 2-channel 5x5 image.
				2309	// Calculated using the python tensorflow library with strideX=1, strideY=1.
				2310	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
				2311	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2312	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2313	1062, 1580, 1850, 1530, 1117,
				2314	2140, 3108, 3500, 2842, 2042,
				2315	3580, 5068, 5460, 4342, 3062,
				2316	3618, 5072, 5390, 4248, 2971,
				2317	3074, 4282, 4510, 3533, 2457,
				2318
				2319	1550, 2284, 2362, 1955, 1428,
				2320	2910, 4206, 4342, 3528, 2536,
				2321	3390, 4886, 5022, 4068, 2916,
				2322	3566, 5056, 5182, 4133, 2922,
				2323	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2324	},
				2325	outputTensorInfo.GetQuantizationScale(),
				2326	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2327
				2328	return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
				2329	workloadFactory,
				2330	memoryManager,
				2331	input,
				2332	kernel,
				2333	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2334	expectedOutput,
				2335	qScale,
				2336	qOffset,
				2337	layout,
				2338	1, // Padding left.
				2339	1, // Padding top.
				2340	2, // Padding right.
				2341	2, // Padding bottom.
				2342	1, // strideX
				2343	1); // strideY
				2344	}
				2345
				2346	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2347	typename T = armnn::ResolveType<ArmnnType>>
				2348	LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
				2349	armnn::IWorkloadFactory& workloadFactory,
				2350	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2351	float qScale,
				2352	int32_t qOffset,
				2353	bool biasEnabled)
				2354	{
				2355	auto layout = armnn::DataLayout::NHWC;
				2356
				2357	armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2358	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2359	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2360	0, 1, 2, 3, 4,
				2361	5, 6, 7, 8, 9,
				2362	10, 11, 12, 13, 14,
				2363	15, 16, 17, 18, 19,
				2364	20, 21, 22, 23, 24,
				2365
				2366	25, 26, 27, 28, 29,
				2367	30, 31, 32, 33, 34,
				2368	35, 36, 37, 38, 39,
				2369	40, 41, 42, 43, 44,
				2370	45, 46, 47, 48, 49
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2371	},
				2372	inputTensorInfo.GetQuantizationScale(),
				2373	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2374
				2375	armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
				2376	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2377	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2378	32, 31, 30, 29,
				2379	28, 27, 26, 25,
				2380	24, 23, 22, 21,
				2381	20, 19, 18, 17,
				2382
				2383	16, 15, 14, 13,
				2384	12, 11, 10, 9,
				2385	8, 7, 6, 5,
				2386	4, 3, 2, 1
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2387	},
				2388	kernelTensorInfo.GetQuantizationScale(),
				2389	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2390
				2391	armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
				2392	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2393	QuantizedVector<T>({
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2394	1062, 1580, 1850, 1530, 1117,
				2395	2140, 3108, 3500, 2842, 2042,
				2396	3580, 5068, 5460, 4342, 3062,
				2397	3618, 5072, 5390, 4248, 2971,
				2398	3074, 4282, 4510, 3533, 2457,
				2399
				2400	1550, 2284, 2362, 1955, 1428,
				2401	2910, 4206, 4342, 3528, 2536,
				2402	3390, 4886, 5022, 4068, 2916,
				2403	3566, 5056, 5182, 4133, 2922,
				2404	3100, 4352, 4452, 3517, 2465
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2405	},
				2406	outputTensorInfo.GetQuantizationScale(),
				2407	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2408
				2409	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2410	workloadFactory,
				2411	memoryManager,
				2412	input,
				2413	kernel,
				2414	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2415	expectedOutput,
				2416	qScale,
				2417	qOffset,
				2418	layout,
				2419	1, // Padding left.
				2420	1, // Padding top.
				2421	2, // Padding right.
				2422	2, // Padding bottom.
				2423	1, // strideX
				2424	1); // strideY
				2425	}
				2426
				2427	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
				2428	typename T = armnn::ResolveType<ArmnnType>>
				2429	LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
				2430	armnn::IWorkloadFactory& workloadFactory,
				2431	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2432	float qScale,
				2433	int32_t qOffset,
				2434	bool biasEnabled)
				2435	{
				2436	auto layout = armnn::DataLayout::NHWC;
				2437
				2438	armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
				2439	auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2440	QuantizedVector<T>({
				2441	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2442	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2443	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2444	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2445	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2446	0, 0, 0, 1, 1, 1, 0, 0, 0,
				2447	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2448	0, 0, 0, 0, 0, 0, 0, 0, 0,
				2449	0, 0, 0, 0, 0, 0, 0, 0, 0
				2450	},
				2451	inputTensorInfo.GetQuantizationScale(),
				2452	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2453
				2454	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2455	auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2456	QuantizedVector<T>({
				2457	1, 2, 3,
				2458	4, 5, 6,
				2459	7, 8, 9
				2460	},
				2461	kernelTensorInfo.GetQuantizationScale(),
				2462	kernelTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2463
				2464	uint32_t padLeft = 0;
				2465	uint32_t padTop = 0;
				2466	uint32_t padRight = 0;
				2467	uint32_t padBottom = 0;
				2468	uint32_t strideX = 1;
				2469	uint32_t strideY = 1;
				2470	uint32_t dilationX = 3;
				2471	uint32_t dilationY = 3;
				2472
				2473	// Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
				2474	armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2475	boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2476	QuantizedVector<T>({
				2477	5, 5, 5,
				2478	5, 5, 5,
				2479	5, 5, 5
				2480	},
				2481	outputTensorInfo.GetQuantizationScale(),
				2482	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2483
				2484	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2485	workloadFactory,
				2486	memoryManager,
				2487	input,
				2488	kernel,
				2489	GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
				2490	expectedOutput,
				2491	qScale,
				2492	qOffset,
				2493	layout,
				2494	padLeft,
				2495	padTop,
				2496	padRight,
				2497	padBottom,
				2498	strideX,
				2499	strideY,
				2500	dilationX,
				2501	dilationY);
				2502	}
				2503
				2504	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
				2505	LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
				2506	armnn::IWorkloadFactory& workloadFactory,
				2507	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2508	const std::vector<float>& inputNoQuantizedValues,
				2509	armnn::TensorInfo& inputTensorInfo,
				2510	const std::vector<float>& kernelNoQuantizedValues,
				2511	armnn::TensorInfo& kernelTensorInfo,
				2512	const std::vector<float>& outputExpectedNoQuantizedValues,
				2513	armnn::TensorInfo& outputTensorInfo,
				2514	uint32_t dilationX,
				2515	uint32_t dilationY,
				2516	armnn::DataLayout layout = armnn::DataLayout::NCHW,
				2517	bool biasEnabled = false)
				2518	{
				2519	float qScale;
				2520	int32_t qOffset;
				2521	switch (ArmnnType)
				2522	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2523	case armnn::DataType::QAsymmU8:
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2524	{
				2525	qScale = 0.1f;
				2526	qOffset = 128;
				2527	break;
				2528	}
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	2529	case armnn::DataType::QSymmS16:
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2530	{
				2531	qScale = 0.1f;
				2532	qOffset = 0;
				2533	break;
				2534	}
				2535	case armnn::DataType::Float32:
				2536	default:
				2537	{
				2538	qScale = 0.f;
				2539	qOffset = 0;
				2540	break;
				2541	}
				2542	}
				2543
				2544	inputTensorInfo.SetQuantizationScale(qScale);
				2545	inputTensorInfo.SetQuantizationOffset(qOffset);
				2546	kernelTensorInfo.SetQuantizationScale(qScale);
				2547	kernelTensorInfo.SetQuantizationOffset(qOffset);
				2548	outputTensorInfo.SetQuantizationScale(qScale);
				2549	outputTensorInfo.SetQuantizationOffset(qOffset);
				2550
				2551	auto input = MakeTensor<T, 4>(inputTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2552	std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
				2553	inputTensorInfo.GetQuantizationScale(),
				2554	inputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2555	auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
Aron Virginas-Tar	48623a0	2019-10-22 10:00:28 +0100	[diff] [blame]	2556	std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
				2557	kernelTensorInfo.GetQuantizationScale(),
				2558	kernelTensorInfo.GetQuantizationOffset())));
				2559	auto expectedOutput =
				2560	MakeTensor<T, 4>(outputTensorInfo,
				2561	std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
				2562	outputTensorInfo.GetQuantizationScale(),
				2563	outputTensorInfo.GetQuantizationOffset())));
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	2564
				2565	uint32_t padLeft = 0;
				2566	uint32_t padTop = 0;
				2567	uint32_t padRight = 0;
				2568	uint32_t padBottom = 0;
				2569	uint32_t strideX = 1;
				2570	uint32_t strideY = 1;
				2571
				2572	return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
				2573	workloadFactory,
				2574	memoryManager,
				2575	input,
				2576	kernel,
				2577	GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
				2578	expectedOutput,
				2579	qScale,
				2580	qOffset,
				2581	layout,
				2582	padLeft,
				2583	padTop,
				2584	padRight,
				2585	padBottom,
				2586	strideX,
				2587	strideY,
				2588	dilationX,
				2589	dilationY);
				2590	}
				2591
				2592	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2593	LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
				2594	armnn::IWorkloadFactory& workloadFactory,
				2595	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2596	bool biasEnabled,
				2597	const armnn::DataLayout layout)
				2598	{
				2599	armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
				2600	std::vector<float> inputNoQuantizedValues =
				2601	{
				2602	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2603	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2604	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2605	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2606	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2607	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2608	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2609	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2610	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2611	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2612	};
				2613
				2614	armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
				2615	std::vector<float> kernelNoQuantizedValues =
				2616	{
				2617	1, 2, 3,
				2618	4, 5, 6,
				2619	7, 8, 9
				2620	};
				2621
				2622	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2623	// therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2624	armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
				2625	std::vector<float> outputExpectedNoQuantizedValues =
				2626	{
				2627	6., 5., 5., 5.,
				2628	6., 5., 5., 5.,
				2629	6., 5., 5., 5.,
				2630	3., 2., 2., 2.
				2631	};
				2632
				2633	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2634	workloadFactory,
				2635	memoryManager,
				2636	inputNoQuantizedValues,
				2637	inputTensorInfo,
				2638	kernelNoQuantizedValues,
				2639	kernelTensorInfo,
				2640	outputExpectedNoQuantizedValues,
				2641	outputTensorInfo,
				2642	3,
				2643	3,
				2644	layout,
				2645	biasEnabled);
				2646	}
				2647
				2648	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2649	LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
				2650	armnn::IWorkloadFactory& workloadFactory,
				2651	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2652	bool biasEnabled,
				2653	const armnn::DataLayout layout)
				2654	{
				2655	armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
				2656	std::vector<float> inputNoQuantizedValues =
				2657	{
				2658	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2659	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2660	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2661	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2662	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2663	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2664	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2665	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2666	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2667	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2668
				2669	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2670	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2671	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2672	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2673	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2674	0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
				2675	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2676	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2677	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				2678	0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				2679	};
				2680
				2681	armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
				2682	std::vector<float> kernelNoQuantizedValues =
				2683	{
				2684	1, 2, 3,
				2685	4, 5, 6,
				2686	7, 8, 9,
				2687
				2688	1, 2, 3,
				2689	4, 5, 6,
				2690	7, 8, 9
				2691	};
				2692
				2693	// Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
				2694	// therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
				2695	armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
				2696	std::vector<float> outputExpectedNoQuantizedValues =
				2697	{
				2698	6., 5., 5., 5.,
				2699	6., 5., 5., 5.,
				2700	6., 5., 5., 5.,
				2701	3., 2., 2., 2.,
				2702
				2703	6., 5., 5., 5.,
				2704	6., 5., 5., 5.,
				2705	6., 5., 5., 5.,
				2706	3., 2., 2., 2.
				2707	};
				2708
				2709	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2710	workloadFactory,
				2711	memoryManager,
				2712	inputNoQuantizedValues,
				2713	inputTensorInfo,
				2714	kernelNoQuantizedValues,
				2715	kernelTensorInfo,
				2716	outputExpectedNoQuantizedValues,
				2717	outputTensorInfo,
				2718	3,
				2719	3,
				2720	layout,
				2721	biasEnabled);
				2722	}
				2723
				2724	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2725	LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
				2726	armnn::IWorkloadFactory& workloadFactory,
				2727	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2728	bool biasEnabled,
				2729	const armnn::DataLayout layout)
				2730	{
				2731	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2732	std::vector<float> inputNoQuantizedValues =
				2733	{
				2734	10.0, 10.0, 10.0,
				2735	10.0, 10.0, 10.0,
				2736	10.0, 10.0, 10.0,
				2737
				2738	21.0, 22.0, 23.0,
				2739	24.0, 25.0, 26.0,
				2740	27.0, 28.0, 29.0
				2741	};
				2742
				2743	armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
				2744
				2745	std::vector<float> kernelNoQuantizedValues =
				2746	{
				2747	0.25f, 0.25f,
				2748	0.25f, 0.25f,
				2749
				2750	0.25f, 0.25f,
				2751	0.25f, 0.25f,
				2752
				2753	0.0f , 0.0f,
				2754	0.0f , 0.1f,
				2755
				2756	0.0f , 0.0f,
				2757	0.0f , 0.1f,
				2758
				2759	0.2f , 0.0f,
				2760	0.0f , 0.0f,
				2761
				2762	0.2f , 0.0f,
				2763	0.0f , 0.0f,
				2764
				2765	0.0f , 0.3f,
				2766	0.0f , 0.0f,
				2767
				2768	0.0f , 0.3f,
				2769	0.0f , 0.0f
				2770	};
				2771
				2772	armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
				2773	std::vector<float> outputExpectedNoQuantizedValues =
				2774	{
				2775	10.f, 10.f,
				2776	10.f, 10.f,
				2777
				2778	1.f, 1.f,
				2779	1.f, 1.f,
				2780
				2781	2.f, 2.f,
				2782	2.f, 2.f,
				2783
				2784	3.f, 3.f,
				2785	3.f, 3.f,
				2786
				2787	23.f, 24.f,
				2788	26.f, 27.f,
				2789
				2790	2.5f, 2.6000001f,
				2791	2.8f, 2.9f,
				2792
				2793	4.2000003f, 4.4f,
				2794	4.8f, 5.f,
				2795
				2796	6.6000004f, 6.9f,
				2797	7.5000005f, 7.8f
				2798	};
				2799
				2800
				2801	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2802	workloadFactory,
				2803	memoryManager,
				2804	inputNoQuantizedValues,
				2805	inputTensorInfo,
				2806	kernelNoQuantizedValues,
				2807	kernelTensorInfo,
				2808	outputExpectedNoQuantizedValues,
				2809	outputTensorInfo,
				2810	1,
				2811	1,
				2812	layout,
				2813	biasEnabled);
				2814	}
				2815
				2816	template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
				2817	LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
				2818	armnn::IWorkloadFactory& workloadFactory,
				2819	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2820	bool biasEnabled,
				2821	const armnn::DataLayout layout)
				2822	{
				2823	armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
				2824	std::vector<float> inputNoQuantizedValues =
				2825	{
				2826	10.0, 10.0, 10.0,
				2827	10.0, 10.0, 10.0,
				2828	10.0, 10.0, 10.0,
				2829
				2830	21.0, 22.0, 23.0,
				2831	24.0, 25.0, 26.0,
				2832	27.0, 28.0, 29.0
				2833	};
				2834
				2835	armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
				2836
				2837	std::vector<float> kernelNoQuantizedValues =
				2838	{
				2839	0.25f, 0.25f,
				2840	0.25f, 0.25f,
				2841
				2842	0.2f , 0.0f,
				2843	0.0f , 0.0f,
				2844
				2845	0.0f , 0.0f,
				2846	0.0f , 0.1f,
				2847
				2848	0.0f , 0.3f,
				2849	0.0f , 0.0f
				2850
				2851	};
				2852
				2853	armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
				2854	std::vector<float> outputExpectedNoQuantizedValues =
				2855	{
				2856	10.f, 10.f,
				2857	10.f, 10.f,
				2858
				2859	1.f, 1.f,
				2860	1.f, 1.f,
				2861
				2862	4.2000003f, 4.4f,
				2863	4.8f, 5.f,
				2864
				2865	6.6000004f, 6.9f,
				2866	7.5000005f, 7.8f
				2867	};
				2868
				2869
				2870	return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
				2871	workloadFactory,
				2872	memoryManager,
				2873	inputNoQuantizedValues,
				2874	inputTensorInfo,
				2875	kernelNoQuantizedValues,
				2876	kernelTensorInfo,
				2877	outputExpectedNoQuantizedValues,
				2878	outputTensorInfo,
				2879	1,
				2880	1,
				2881	layout,
				2882	biasEnabled);
				2883	}
				2884
				2885	template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
				2886	LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
				2887	armnn::IWorkloadFactory& workloadFactory,
				2888	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				2889	armnn::IWorkloadFactory& refWorkloadFactory,
				2890	const armnnUtils::DataLayoutIndexed& layout)
				2891	{
				2892	unsigned int inputHeight = 8;
				2893	unsigned int inputWidth = 16;
				2894	unsigned int inputChannels = 3;
				2895	unsigned int inputNum = 5;
				2896
				2897	unsigned int kernelHeight = 3;
				2898	unsigned int kernelWidth = 3;
				2899	unsigned int channelMultiplier = 1;
				2900
				2901	unsigned int strideX = 2;
				2902	unsigned int strideY = 3;
				2903	unsigned int padX = 1;
				2904	unsigned int padY = 1;
				2905
				2906	unsigned int outputNum = inputNum;
				2907	unsigned int outputChannels = inputChannels * channelMultiplier;
				2908	unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
				2909	unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
				2910
				2911	armnn::TensorInfo inputTensorInfo;
				2912	armnn::TensorInfo outputTensorInfo;
				2913	armnn::TensorInfo kernelDesc;
				2914	armnn::TensorInfo biasDesc;
				2915
				2916
				2917	std::vector<unsigned int> inputShape;
				2918	std::vector<unsigned int> outputShape;
				2919	std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
				2920	std::vector<unsigned int> biasShape{ outputChannels };
				2921	switch (layout.GetDataLayout())
				2922	{
				2923	case armnn::DataLayout::NCHW:
				2924	inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
				2925	outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
				2926	break;
				2927	case armnn::DataLayout ::NHWC:
				2928	inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
				2929	outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
				2930	break;
				2931	default:
				2932	throw armnn::InvalidArgumentException("unknown data layout ["
				2933	+ std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
				2934	}
				2935
				2936	float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
				2937	float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
				2938	int32_t qOffset = 0;
				2939
				2940	inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
				2941	outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
				2942	kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
				2943	biasDesc = armnn::TensorInfo(
				2944	1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
				2945
				2946	LayerTestResult<T, 4> ret(outputTensorInfo);
				2947
				2948	auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
				2949	auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
				2950	auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
				2951	biasDesc, 1028, 0.0f, 255.0f);
				2952
				2953	std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
				2954	std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
				2955
				2956	armnn::DepthwiseConvolution2dQueueDescriptor data;
				2957	armnn::WorkloadInfo info;
				2958	armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
				2959	armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
				2960
				2961	AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
				2962	AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
				2963
				2964	AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
				2965	AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
				2966	data.m_Weight = &weightsTensor;
				2967	data.m_Bias = &biasTensor;
				2968	data.m_Parameters.m_StrideX = strideX;
				2969	data.m_Parameters.m_StrideY = strideY;
				2970	data.m_Parameters.m_PadLeft = padX;
				2971	data.m_Parameters.m_PadRight = padX;
				2972	data.m_Parameters.m_PadTop = padY;
				2973	data.m_Parameters.m_PadBottom = padY;
				2974	data.m_Parameters.m_BiasEnabled = true;
				2975	data.m_Parameters.m_DataLayout = layout.GetDataLayout();
				2976
				2977	std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
				2978	std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
				2979
				2980	armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
				2981	armnn::WorkloadInfo refInfo = info;
				2982	SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
				2983	SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
				2984
				2985	std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
				2986	std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
				2987
				2988	outputHandleRef->Allocate();
				2989	inputHandleRef->Allocate();
				2990
				2991	inputHandle->Allocate();
				2992	outputHandle->Allocate();
				2993
				2994	CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
				2995	CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
				2996
				2997	ExecuteWorkload(*workload, memoryManager);
				2998
				2999	workloadRef->PostAllocationConfigure();
				3000	workloadRef->Execute();
				3001
				3002	CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
				3003	CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
				3004
				3005	return ret;
				3006	}
				3007
				3008	//
				3009	// Explicit template specializations
				3010	//
Narumol Prangnawarat	44179c3	2020-03-11 14:51:27 +0000	[diff] [blame]	3011	template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
				3012	Convolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
				3013	armnn::IWorkloadFactory&,
				3014	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3015	bool,
				3016	armnn::DataLayout);
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3017
				3018	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				3019	Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				3020	armnn::IWorkloadFactory&,
				3021	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3022	bool,
				3023	armnn::DataLayout);
				3024
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3025	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				3026	Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3027	armnn::IWorkloadFactory&,
				3028	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3029	bool,
				3030	armnn::DataLayout);
				3031
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3032	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				3033	Convolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3034	armnn::IWorkloadFactory&,
				3035	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3036	bool,
				3037	armnn::DataLayout);
				3038
				3039	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				3040	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				3041	armnn::IWorkloadFactory&,
				3042	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3043	bool,
				3044	armnn::DataLayout);
				3045
Narumol Prangnawarat	44179c3	2020-03-11 14:51:27 +0000	[diff] [blame]	3046	template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
				3047	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
				3048	armnn::IWorkloadFactory&,
				3049	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3050	bool,
				3051	armnn::DataLayout);
				3052
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3053	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				3054	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3055	armnn::IWorkloadFactory&,
				3056	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3057	bool,
				3058	armnn::DataLayout);
				3059
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3060	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				3061	Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3062	armnn::IWorkloadFactory&,
				3063	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3064	bool,
				3065	armnn::DataLayout);
				3066
Narumol Prangnawarat	44179c3	2020-03-11 14:51:27 +0000	[diff] [blame]	3067	template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
				3068	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
				3069	armnn::IWorkloadFactory &workloadFactory,
				3070	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				3071	bool biasEnabled,
				3072	const armnn::DataLayout layout);
				3073
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3074	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				3075	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				3076	armnn::IWorkloadFactory &workloadFactory,
				3077	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				3078	bool biasEnabled,
				3079	const armnn::DataLayout layout);
				3080
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3081	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				3082	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3083	armnn::IWorkloadFactory &workloadFactory,
				3084	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				3085	bool biasEnabled,
				3086	const armnn::DataLayout layout);
				3087
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3088	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				3089	Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3090	armnn::IWorkloadFactory &workloadFactory,
				3091	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				3092	bool biasEnabled,
				3093	const armnn::DataLayout layout);
				3094
Narumol Prangnawarat	44179c3	2020-03-11 14:51:27 +0000	[diff] [blame]	3095	template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
				3096	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
				3097	armnn::IWorkloadFactory&,
				3098	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3099	bool,
				3100	armnn::DataLayout);
				3101
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3102	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				3103	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				3104	armnn::IWorkloadFactory&,
				3105	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3106	bool,
				3107	armnn::DataLayout);
				3108
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3109	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				3110	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3111	armnn::IWorkloadFactory&,
				3112	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3113	bool,
				3114	armnn::DataLayout);
				3115
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3116	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				3117	DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3118	armnn::IWorkloadFactory&,
				3119	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3120	bool,
				3121	armnn::DataLayout);
				3122
Narumol Prangnawarat	44179c3	2020-03-11 14:51:27 +0000	[diff] [blame]	3123	template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
				3124	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
				3125	armnn::IWorkloadFactory&,
				3126	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3127	bool,
				3128	armnn::DataLayout);
				3129
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3130	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				3131	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				3132	armnn::IWorkloadFactory&,
				3133	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3134	bool,
				3135	armnn::DataLayout);
				3136
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3137	template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
				3138	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3139	armnn::IWorkloadFactory&,
				3140	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3141	bool,
				3142	armnn::DataLayout);
				3143
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3144	template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
				3145	DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3146	armnn::IWorkloadFactory&,
				3147	const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
				3148	bool,
				3149	armnn::DataLayout);
				3150
Narumol Prangnawarat	44179c3	2020-03-11 14:51:27 +0000	[diff] [blame]	3151	template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
				3152	DepthwiseConvolution2dMult4Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
				3153	armnn::IWorkloadFactory &workloadFactory,
				3154	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				3155	bool biasEnabled,
				3156	const armnn::DataLayout layout);
				3157
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3158	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				3159	DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				3160	armnn::IWorkloadFactory &workloadFactory,
				3161	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				3162	bool biasEnabled,
				3163	const armnn::DataLayout layout);
				3164
Narumol Prangnawarat	44179c3	2020-03-11 14:51:27 +0000	[diff] [blame]	3165	template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
				3166	DepthwiseConvolution2dMult2Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
				3167	armnn::IWorkloadFactory &workloadFactory,
				3168	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				3169	bool biasEnabled,
				3170	const armnn::DataLayout layout);
				3171
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3172	template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
				3173	DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
				3174	armnn::IWorkloadFactory &workloadFactory,
				3175	const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
				3176	bool biasEnabled,
				3177	const armnn::DataLayout layout);
				3178
				3179	//
				3180	// Implementation functions
				3181	//
				3182
				3183	LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
				3184	armnn::IWorkloadFactory& workloadFactory,
				3185	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3186	bool biasEnabled,
				3187	const armnn::DataLayout layout)
				3188	{
				3189	return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3190	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				3191	}
				3192
				3193	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
				3194	armnn::IWorkloadFactory& workloadFactory,
				3195	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3196	bool biasEnabled,
				3197	const armnn::DataLayout layout)
				3198	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3199	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3200	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3201	}
				3202
				3203	LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
				3204	armnn::IWorkloadFactory& workloadFactory,
				3205	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3206	bool biasEnabled,
				3207	const armnn::DataLayout layout)
				3208	{
				3209	return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3210	workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
				3211	}
				3212
				3213	LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
				3214	armnn::IWorkloadFactory& workloadFactory,
				3215	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3216	bool biasEnabled)
				3217	{
				3218	return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
				3219	workloadFactory,
				3220	memoryManager,
				3221	0.f,
				3222	0,
				3223	biasEnabled,
				3224	armnn::DataLayout::NHWC);
				3225	}
				3226
				3227	LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
				3228	armnn::IWorkloadFactory& workloadFactory,
				3229	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3230	bool biasEnabled,
				3231	const armnn::DataLayout layout)
				3232	{
				3233	return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
				3234	workloadFactory,
				3235	memoryManager,
				3236	0.f,
				3237	0,
				3238	biasEnabled,
				3239	layout);
				3240	}
				3241
				3242	LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
				3243	armnn::IWorkloadFactory& workloadFactory,
				3244	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3245	bool biasEnabled,
				3246	const armnn::DataLayout layout)
				3247	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3248	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3249	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3250	}
				3251
				3252	LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
				3253	armnn::IWorkloadFactory& workloadFactory,
				3254	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3255	bool biasEnabled,
				3256	const armnn::DataLayout layout)
				3257	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3258	return SimpleConvolution2d3x5TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3259	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3260	}
				3261
				3262	LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
				3263	armnn::IWorkloadFactory& workloadFactory,
				3264	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3265	bool biasEnabled,
				3266	const armnn::DataLayout layout)
				3267	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3268	return SimpleConvolution2d3x3TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3269	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3270	}
				3271
				3272	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
				3273	armnn::IWorkloadFactory& workloadFactory,
				3274	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3275	armnn::DataLayout layout)
				3276	{
				3277	return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3278	workloadFactory, memoryManager, layout, 0.0f, 0);
				3279	}
				3280
				3281	LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
				3282	armnn::IWorkloadFactory& workloadFactory,
				3283	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3284	armnn::DataLayout layout)
				3285	{
				3286	return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
				3287	<armnn::DataType::Float32, armnn::DataType::Float32>(
				3288	workloadFactory, memoryManager, layout, 0.0f, 0);
				3289	}
				3290
				3291	LayerTestResult<float, 4> Convolution1dTest(
				3292	armnn::IWorkloadFactory& workloadFactory,
				3293	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3294	bool biasEnabled)
				3295	{
				3296	return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3297	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3298	}
				3299
				3300	LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
				3301	armnn::IWorkloadFactory& workloadFactory,
				3302	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3303	bool biasEnabled)
				3304	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3305	return Convolution1dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3306	workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
				3307	}
				3308
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	3309	LayerTestResult<uint8_t, 4> Convolution2dPerAxisQuantTest(
				3310	armnn::IWorkloadFactory& workloadFactory,
				3311	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3312	const armnn::DataLayout layout)
				3313	{
				3314	using namespace armnn;
				3315
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3316	const DataType inputType = DataType::QAsymmU8;
Derek Lamberti	d466a54	2020-01-22 15:37:29 +0000	[diff] [blame]	3317	const DataType kernelType = DataType::QSymmS8;
Aron Virginas-Tar	5edc881	2019-11-05 18:00:21 +0000	[diff] [blame]	3318	const DataType biasType = DataType::Signed32;
				3319
				3320	TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
				3321	TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
				3322
				3323	const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
				3324	constexpr unsigned int quantDimension = 0;
				3325
				3326	TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
				3327
				3328	const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
				3329	TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
				3330
				3331	std::vector<uint8_t> inputData =
				3332	{
				3333	138, 108, 138, 108, 138, 108
				3334	};
				3335
				3336	std::vector<int8_t> kernelData =
				3337	{
				3338	1, 2, 1, 2, 1, 2
				3339	};
				3340
				3341	std::vector<int32_t> biasData =
				3342	{
				3343	4, 4, 4
				3344	};
				3345
				3346	std::vector<uint8_t> expectedOutputData =
				3347	{
				3348	121, 118, 115, 121, 118, 115, 121, 118, 115
				3349	};
				3350
				3351	if (layout == DataLayout::NCHW)
				3352	{
				3353	PermuteTensorNhwcToNchw(inputInfo, inputData);
				3354	PermuteTensorNhwcToNchw(kernelInfo, kernelData);
				3355	PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
				3356	}
				3357
				3358	Convolution2dDescriptor descriptor;
				3359	descriptor.m_StrideX = 1;
				3360	descriptor.m_StrideY = 1;
				3361	descriptor.m_PadLeft = 0;
				3362	descriptor.m_PadRight = 0;
				3363	descriptor.m_PadTop = 0;
				3364	descriptor.m_PadBottom = 0;
				3365	descriptor.m_BiasEnabled = true;
				3366	descriptor.m_DataLayout = layout;
				3367
				3368	std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				3369	std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				3370
				3371	WorkloadInfo workloadInfo;
				3372	ScopedCpuTensorHandle weightTensor(kernelInfo);
				3373	ScopedCpuTensorHandle biasTensor(biasInfo);
				3374
				3375	AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
				3376	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				3377
				3378	Convolution2dQueueDescriptor queueDescriptor;
				3379	queueDescriptor.m_Parameters = descriptor;
				3380	queueDescriptor.m_Weight = &weightTensor;
				3381	queueDescriptor.m_Bias = &biasTensor;
				3382
				3383	AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
				3384	AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
				3385
				3386	std::unique_ptr<IWorkload> workload = workloadFactory.CreateConvolution2d(queueDescriptor, workloadInfo);
				3387	inputHandle->Allocate();
				3388	outputHandle->Allocate();
				3389
				3390	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				3391
				3392	ExecuteWorkload(*workload, memoryManager);
				3393
				3394	LayerTestResult<uint8_t, 4> ret(outputInfo);
				3395	CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
				3396	ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
				3397
				3398	return ret;
				3399	}
				3400
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3401	LayerTestResult<float,4> CompareConvolution2dTest(
				3402	armnn::IWorkloadFactory& workloadFactory,
				3403	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3404	armnn::IWorkloadFactory& refWorkloadFactory)
				3405	{
				3406	return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
				3407	workloadFactory, memoryManager, refWorkloadFactory);
				3408	}
				3409
				3410	LayerTestResult<float, 4> DepthwiseConvolution2dTest(
				3411	armnn::IWorkloadFactory& workloadFactory,
				3412	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3413	bool biasEnabled,
				3414	const armnn::DataLayout layout)
				3415	{
				3416	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3417	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3418	}
				3419
				3420	LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
				3421	armnn::IWorkloadFactory& workloadFactory,
				3422	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3423	bool biasEnabled)
				3424	{
				3425	return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3426	workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
				3427	}
				3428
				3429	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
				3430	armnn::IWorkloadFactory& workloadFactory,
				3431	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3432	bool biasEnabled,
				3433	const armnn::DataLayout layout)
				3434	{
				3435	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3436	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3437	}
				3438
				3439	LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
				3440	armnn::IWorkloadFactory& workloadFactory,
				3441	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3442	{
				3443	armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
				3444	auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
				3445
				3446	std::vector<float> kernelData;
				3447	std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
				3448	for (unsigned int i = 0; i < 64; ++i)
				3449	{
				3450	kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
				3451	}
				3452	armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
				3453	auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
				3454
				3455	std::vector<float> expectedOutputData(64, 0.f);
				3456	armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
				3457	auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
				3458
				3459	return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
				3460	workloadFactory,
				3461	memoryManager,
				3462	input,
				3463	kernel,
				3464	boost::multi_array<float, 1>(),
				3465	expectedOutput,
				3466	0.f,
				3467	0,
				3468	armnn::DataLayout::NCHW);
				3469	}
				3470
				3471	LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
				3472	armnn::IWorkloadFactory& workloadFactory,
				3473	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3474	bool biasEnabled,
				3475	const armnn::DataLayout layout)
				3476	{
				3477	return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3478	workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
				3479	}
				3480
				3481	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
				3482	armnn::IWorkloadFactory& workloadFactory,
				3483	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3484	bool biasEnabled,
				3485	const armnn::DataLayout layout)
				3486	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3487	return DepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3488	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3489	}
				3490
				3491	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
				3492	armnn::IWorkloadFactory& workloadFactory,
				3493	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3494	bool biasEnabled,
				3495	const armnn::DataLayout layout)
				3496	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3497	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3498	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3499	}
				3500
				3501	LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
				3502	armnn::IWorkloadFactory& workloadFactory,
				3503	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
				3504	{
				3505	return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
				3506	workloadFactory,
				3507	memoryManager,
				3508	0.f,
				3509	0,
				3510	false);
				3511	}
				3512
				3513	LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
				3514	armnn::IWorkloadFactory& workloadFactory,
				3515	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3516	bool biasEnabled,
				3517	const armnn::DataLayout layout)
				3518	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3519	return DepthwiseConvolution2dTestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3520	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3521	}
				3522
				3523	LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
				3524	armnn::IWorkloadFactory& workloadFactory,
				3525	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3526	bool biasEnabled,
				3527	const armnn::DataLayout layout)
				3528	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3529	return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3530	workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
				3531	}
				3532
Teresa Charlin	d8df026	2019-11-11 12:28:15 +0000	[diff] [blame]	3533	LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
				3534	armnn::IWorkloadFactory& workloadFactory,
				3535	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3536	const armnn::DataLayout layout)
				3537	{
				3538	using namespace armnn;
				3539
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3540	const DataType inputType = DataType::QAsymmU8;
Derek Lamberti	d466a54	2020-01-22 15:37:29 +0000	[diff] [blame]	3541	const DataType kernelType = DataType::QSymmS8;
Teresa Charlin	d8df026	2019-11-11 12:28:15 +0000	[diff] [blame]	3542	const DataType biasType = DataType::Signed32;
				3543
				3544	TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
				3545	TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
				3546
				3547	const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
				3548	const unsigned int quantDimension = 0;
				3549	TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
				3550
				3551	const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
				3552	constexpr unsigned int biasQuantDimension = 0;
				3553	TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
				3554
				3555	std::vector<uint8_t> inputData =
				3556	{
				3557	129, 130,
				3558	129, 130,
				3559	129, 130,
				3560	129, 130,
				3561	129, 130,
				3562	129, 130,
				3563	129, 130,
				3564	129, 130,
				3565	129, 130
				3566	};
				3567
				3568	std::vector<int8_t> kernelData =
				3569	{
				3570	1, 1, 1, 1,
				3571	1, 1, 1, 1,
				3572	1, 1, 1, 1,
				3573	1, 1, 1, 1
				3574	};
				3575
				3576	std::vector<int32_t> biasData =
				3577	{
				3578	4, 4, 4, 4
				3579	};
				3580
				3581	std::vector<uint8_t> expectedOutputData =
				3582	{
				3583	132, 130, 134, 131,
				3584	132, 130, 134, 131,
				3585	132, 130, 134, 131,
				3586	132, 130, 134, 131
				3587	};
				3588
				3589	if (layout == DataLayout::NCHW)
				3590	{
				3591	PermuteTensorNhwcToNchw(inputInfo, inputData);
				3592	PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
				3593	}
				3594
				3595	DepthwiseConvolution2dDescriptor descriptor;
				3596	descriptor.m_StrideX = 1;
				3597	descriptor.m_StrideY = 1;
				3598	descriptor.m_PadLeft = 0;
				3599	descriptor.m_PadRight = 0;
				3600	descriptor.m_PadTop = 0;
				3601	descriptor.m_PadBottom = 0;
				3602	descriptor.m_DilationX = 1;
				3603	descriptor.m_DilationY = 1;
				3604	descriptor.m_BiasEnabled = true;
				3605	descriptor.m_DataLayout = layout;
				3606
				3607	std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
				3608	std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
				3609
				3610	WorkloadInfo workloadInfo;
				3611	ScopedCpuTensorHandle weightTensor(kernelInfo);
				3612	ScopedCpuTensorHandle biasTensor(biasInfo);
				3613
				3614	AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
				3615	AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
				3616
				3617	DepthwiseConvolution2dQueueDescriptor queueDescriptor;
				3618	queueDescriptor.m_Parameters = descriptor;
				3619	queueDescriptor.m_Weight = &weightTensor;
				3620	queueDescriptor.m_Bias = &biasTensor;
				3621
				3622	AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
				3623	AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
				3624
				3625	std::unique_ptr<IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo);
				3626	inputHandle->Allocate();
				3627	outputHandle->Allocate();
				3628
				3629	CopyDataToITensorHandle(inputHandle.get(), inputData.data());
				3630
				3631	ExecuteWorkload(*workload, memoryManager);
				3632
				3633	LayerTestResult<uint8_t, 4> ret(outputInfo);
				3634
				3635	CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
				3636	ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
				3637
				3638	return ret;
				3639	}
				3640
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3641	LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
				3642	armnn::IWorkloadFactory& workloadFactory,
				3643	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3644	armnn::IWorkloadFactory& refWorkloadFactory,
				3645	const armnn::DataLayout layout)
				3646	{
				3647	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
				3648	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3649	}
				3650
				3651	LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
				3652	armnn::IWorkloadFactory& workloadFactory,
				3653	const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
				3654	armnn::IWorkloadFactory& refWorkloadFactory,
				3655	const armnn::DataLayout layout)
				3656	{
Derek Lamberti	f90c56d	2020-01-10 17:14:08 +0000	[diff] [blame]	3657	return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8>(
Aron Virginas-Tar	00d306e	2019-08-28 18:08:46 +0100	[diff] [blame]	3658	workloadFactory, memoryManager, refWorkloadFactory, layout);
				3659	}