Blame - src/backends/reference/workloads/Pooling3d.cpp - ml/armnn

blob: 3cae2a94b9ae327315cc70cc0049ccf8961a815f [file] [log] [blame]

Tamás Nyíri	7b885b3	2021-10-26 14:47:57 +0100	[diff] [blame^]	1	//
				2	// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "Pooling3d.hpp"
				7
				8	#include <armnn/Exceptions.hpp>
				9	#include <armnn/Types.hpp>
				10
				11	#include <armnnUtils/DataLayoutIndexed.hpp>
				12	#include <armnn/utility/NumericCast.hpp>
				13
				14	#include <limits>
				15	#include <algorithm>
				16	#include <functional>
				17
				18	namespace
				19	{
				20	using PoolingAlgorithm = armnn::PoolingAlgorithm;
				21
				22	float DefaultInitializer(PoolingAlgorithm algorithm)
				23	{
				24	switch (algorithm)
				25	{
				26	case PoolingAlgorithm::Max:
				27	{
				28	return std::numeric_limits<float>::lowest();
				29	}
				30	case PoolingAlgorithm::Average:
				31	case PoolingAlgorithm::L2:
				32	{
				33	return 0.0f;
				34	}
				35	default:
				36	{
				37	throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
				38	}
				39	}
				40	}
				41
				42	using Accumulator = std::function<void(float & accu, float value)>;
				43
				44	Accumulator GetAccumulator(PoolingAlgorithm algorithm)
				45	{
				46	switch (algorithm)
				47	{
				48	case PoolingAlgorithm::Max:
				49	{
				50	return [](float & accu, float value) {
				51	if (value > accu) {
				52	accu = value;
				53	}
				54	};
				55	}
				56
				57	case PoolingAlgorithm::Average:
				58	{
				59	return [](float & accu, float value) {
				60	accu += value;
				61	};
				62	}
				63
				64	case PoolingAlgorithm::L2:
				65	{
				66	return [](float & accu, float value) {
				67	accu += (value*value);
				68	};
				69	}
				70
				71	default:
				72	{
				73	throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
				74	}
				75	}
				76	}
				77
				78	using Executor = std::function<void(float & accumulated, float kernelSize)>;
				79
				80	Executor GetExecutor(PoolingAlgorithm algorithm)
				81	{
				82	switch (algorithm)
				83	{
				84	case PoolingAlgorithm::Max:
				85	{
				86	return [](float & /accumulated/, float /kernelSize/) {};
				87	}
				88
				89	case PoolingAlgorithm::Average:
				90	{
				91	return [](float & accumulated, float kernelSize) {
				92	accumulated /= kernelSize;
				93	};
				94	}
				95
				96	case PoolingAlgorithm::L2:
				97	{
				98	return [](float & accumulated, float kernelSize) {
				99	accumulated = sqrtf(accumulated / kernelSize);
				100	};
				101	}
				102
				103	default:
				104	{
				105	throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
				106	}
				107	}
				108	}
				109
				110	bool OnPaddingOnly(int start, int end, int maxRange)
				111	{
				112	if (end <= 0 \|\| start > maxRange)
				113	{
				114	return true;
				115	}
				116	else
				117	{
				118	return false;
				119	}
				120	}
				121
				122
				123	bool ClampRange(int & start, int & end, int maxRange)
				124	{
				125	if (start < 0 \|\| end > maxRange)
				126	{
				127	start = std::min(std::max(start, 0), maxRange);
				128	end = std::min(std::max(end, 0), maxRange);
				129	return true;
				130	}
				131	else
				132	{
				133	return false;
				134	}
				135	}
				136
				137	int CalculateIndex(int channels, int depth, int height, int width,
				138	int n, int c, int z, int y, int x,
				139	armnnUtils::DataLayoutIndexed dataLayout) {
				140	switch (dataLayout.GetDataLayout())
				141	{
				142	case armnn::DataLayout::NDHWC:
				143	{
				144	int outputIndex = n * depth * height * width * channels +
				145	z * height * width * channels +
				146	y * width * channels +
				147	x * channels +
				148	c;
				149	return outputIndex;
				150	}
				151	case armnn::DataLayout::NCDHW:
				152	{
				153	int outputIndex = n * channels * depth * height * width +
				154	c * depth * height * width +
				155	z * height * width +
				156	y * width +
				157	x;
				158	return outputIndex;
				159	}
				160	default:
				161	{
				162	throw armnn::InvalidArgumentException("Unsupported data layout.");
				163	}
				164	}
				165	}
				166	}
				167
				168	using namespace armnnUtils;
				169
				170	namespace armnn
				171	{
				172	void Pooling3d(Decoder<float>& rInputDecoder,
				173	Encoder<float>& rOutputEncoder,
				174	const TensorInfo& inputInfo,
				175	const TensorInfo& outputInfo,
				176	const Pooling3dDescriptor& params)
				177	{
				178	const DataLayoutIndexed dataLayout(params.m_DataLayout);
				179
				180	auto channelsIndex = dataLayout.GetChannelsIndex();
				181
				182	auto depthIndex = dataLayout.GetDepthIndex();
				183	auto heightIndex = dataLayout.GetHeightIndex();
				184	auto widthIndex = dataLayout.GetWidthIndex();
				185
				186	const int batchSize = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
				187	const int channels = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
				188
				189	const int depthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[depthIndex]);
				190	const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
				191	const int widthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
				192
				193	const int depthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[depthIndex]);
				194	const int heightInput = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
				195	const int widthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
				196
				197	const int padLeft = armnn::numeric_cast<int>(params.m_PadLeft);
				198	const int padRight = armnn::numeric_cast<int>(params.m_PadRight);
				199	const int padTop = armnn::numeric_cast<int>(params.m_PadTop);
				200	const int padBottom = armnn::numeric_cast<int>(params.m_PadBottom);
				201	const int padFront = armnn::numeric_cast<int>(params.m_PadFront);
				202	const int padBack = armnn::numeric_cast<int>(params.m_PadBack);
				203
				204	const int strideX = armnn::numeric_cast<int>(params.m_StrideX);
				205	const int strideY = armnn::numeric_cast<int>(params.m_StrideY);
				206	const int strideZ = armnn::numeric_cast<int>(params.m_StrideZ);
				207
				208	const int poolHeight = armnn::numeric_cast<int>(params.m_PoolHeight);
				209	const int poolWidth = armnn::numeric_cast<int>(params.m_PoolWidth);
				210	const int poolDepth = armnn::numeric_cast<int>(params.m_PoolDepth);
				211
				212	float defaultInitializer = DefaultInitializer(params.m_PoolType);
				213	Accumulator accumulate = GetAccumulator(params.m_PoolType);
				214	Executor execute = GetExecutor(params.m_PoolType);
				215
				216	// Check supported padding methods outside the loop to simplify
				217	// the inner loop.
				218	if (params.m_PaddingMethod != PaddingMethod::Exclude &&
				219	params.m_PaddingMethod != PaddingMethod::IgnoreValue)
				220	{
				221	throw armnn::InvalidArgumentException("Unsupported padding type");
				222	}
				223
				224	const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
				225
				226	for (int n = 0; n < batchSize; n++)
				227	{
				228	for (int c = 0; c < channels; c++)
				229	{
				230	for (int zOutput = 0; zOutput < depthOutput; zOutput++)
				231	{
				232	// Calculate values independent of the x and y axis
				233	int dstart = (zOutput * strideZ) - padFront;
				234	int dend = dstart + poolDepth;
				235	// Clamp the pooling region inside the valid input area (which includes the padding).
				236	// This is necessary because the final pooling in a row may overlap beyond the padding.
				237	dend = std::min(dend, depthInput + padBack);
				238
				239	int depth = dend - dstart;
				240	bool dclamped = ClampRange(dstart, dend, depthInput);
				241	int depthClamped = dend - dstart;
				242
				243	for (int yOutput = 0; yOutput < heightOutput; yOutput++)
				244	{
				245	int hstart = (yOutput * strideY) - padTop;
				246	int hend = hstart + poolHeight;
				247	// Clamp the pooling region inside the valid input area (which includes the padding).
				248	// This is necessary because the final pooling in a row may overlap beyond the padding.
				249	hend = std::min(hend, heightInput + padBottom);
				250
				251	int height = hend - hstart;
				252	bool hclamped = ClampRange(hstart, hend, heightInput);
				253	int heightClamped = hend - hstart;
				254
				255	for (int xOutput = 0; xOutput < widthOutput; xOutput++)
				256	{
				257	int wstart = (xOutput * strideX) - padLeft;
				258	int wend = wstart + poolWidth;
				259	// Clamp the pooling region inside the valid input area (which includes the padding).
				260	// This is necessary because the final pooling in a row may overlap beyond the padding.
				261	wend = std::min(wend, widthInput + padRight);
				262
				263	int width = wend - wstart;
				264	bool wclamped = ClampRange(wstart, wend, widthInput);
				265	int widthClamped = wend - wstart;
				266
				267	float result = defaultInitializer;
				268	float poolAreaSize = armnn::numeric_cast<float>(depth * height * width);
				269
				270	// Special case: when the pooling kernel is over a padding region and the padding
				271	// size is larger or equal to the kernel and the kernel only covers
				272	// padding and no real values, then we initialize the result as zero
				273	// by convention. This is because we need to choose a value here and
				274	// all values we have are padding, which we ignore.
				275	if (OnPaddingOnly(dstart, dend, depthInput) \|\|
				276	OnPaddingOnly(hstart, hend, heightInput) \|\|
				277	OnPaddingOnly(wstart, wend, widthInput))
				278	{
				279	result = 0.0f;
				280
				281	int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
				282	n, c, zOutput, yOutput, xOutput, dataLayout);
				283
				284	rOutputEncoder[static_cast<unsigned int>(outputIndex)];
				285	rOutputEncoder.Set(result);
				286
				287	continue;
				288	}
				289
				290	bool clamped = (dclamped \| hclamped \| wclamped);
				291
				292	if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
				293	{
				294	// When we exclude the padding, it means we calculate with a smaller
				295	// kernel size, so I changed the divisor here.
				296	poolAreaSize = armnn::numeric_cast<float>(depthClamped * heightClamped * widthClamped);
				297	}
				298
				299	for (auto zInput = dstart; zInput < dend; zInput++)
				300	{
				301	for (auto yInput = hstart; yInput < hend; yInput++)
				302	{
				303	for (auto xInput = wstart; xInput < wend; xInput++)
				304	{
				305
				306	int inputIndex = CalculateIndex(channels, depthInput, heightInput, widthInput,
				307	n, c, zInput, yInput, xInput, dataLayout);
				308
				309	accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
				310	}
				311	}
				312	}
				313
				314	execute(result, poolAreaSize);
				315
				316	int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
				317	n, c, zOutput, yOutput, xOutput, dataLayout);
				318
				319	rOutputEncoder[static_cast<unsigned int>(outputIndex)];
				320	rOutputEncoder.Set(result);
				321	}
				322	}
				323	}
				324	}
				325	}
				326	}
				327
				328	} //namespace armnn