Blame - tests/InferenceTestImage.cpp - ml/armnn

blob: b011e6ac8f1627276328c2c18d97c04113059079 [file] [log] [blame]

telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
David Beck	ecb56cd	2018-09-05 12:52:57 +0100	[diff] [blame]	3	// SPDX-License-Identifier: MIT
telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	4	//
				5	#include "InferenceTestImage.hpp"
				6
				7	#include <boost/core/ignore_unused.hpp>
				8	#include <boost/format.hpp>
				9	#include <boost/core/ignore_unused.hpp>
				10	#include <boost/numeric/conversion/cast.hpp>
				11
				12	#include <array>
				13
				14	#define STB_IMAGE_IMPLEMENTATION
Sadik Armagan	93e2e40	2019-05-02 09:31:38 +0100	[diff] [blame^]	15	#include <stb/stb_image.h>
telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	16
				17	#define STB_IMAGE_RESIZE_IMPLEMENTATION
Sadik Armagan	93e2e40	2019-05-02 09:31:38 +0100	[diff] [blame^]	18	#include <stb/stb_image_resize.h>
telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	19
				20	#define STB_IMAGE_WRITE_IMPLEMENTATION
Sadik Armagan	93e2e40	2019-05-02 09:31:38 +0100	[diff] [blame^]	21	#include <stb/stb_image_write.h>
telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	22
				23	namespace
				24	{
				25
				26	unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
				27	{
				28	switch (channelLayout)
				29	{
				30	case ImageChannelLayout::Rgb:
				31	return static_cast<unsigned int>(channel);
				32	case ImageChannelLayout::Bgr:
				33	return 2u - static_cast<unsigned int>(channel);
				34	default:
				35	throw UnknownImageChannelLayout(boost::str(boost::format("Unknown layout %1%")
				36	% static_cast<int>(channelLayout)));
				37	}
				38	}
				39
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	40	inline float Lerp(float a, float b, float w)
				41	{
				42	return w * b + (1.f - w) * a;
				43	}
				44
				45	inline void PutData(std::vector<float> & data,
				46	const unsigned int width,
				47	const unsigned int x,
				48	const unsigned int y,
				49	const unsigned int c,
				50	float value)
				51	{
				52	data[(3((ywidth)+x)) + c] = value;
				53	}
				54
				55	std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
				56	const unsigned int outputWidth,
				57	const unsigned int outputHeight,
				58	const std::array<float, 3>& mean,
				59	const std::array<float, 3>& stddev)
				60	{
				61	std::vector<float> out;
				62	out.resize(outputWidth * outputHeight * 3);
				63
				64	// We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
				65	// image is projected into the input image to figure out the interpolants and weights. Note that this
				66	// will yield different results than if projecting the centre of output texels.
				67
				68	const unsigned int inputWidth = image.GetWidth();
				69	const unsigned int inputHeight = image.GetHeight();
				70
				71	// How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
				72	// in the input image.
				73	const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
				74	const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
				75
				76	uint8_t rgb_x0y0[3];
				77	uint8_t rgb_x1y0[3];
				78	uint8_t rgb_x0y1[3];
				79	uint8_t rgb_x1y1[3];
				80
				81	for (unsigned int y = 0; y < outputHeight; ++y)
				82	{
				83	// Corresponding real-valued height coordinate in input image.
				84	const float iy = boost::numeric_cast<float>(y) * scaleY;
				85
				86	// Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
				87	const float fiy = floorf(iy);
				88	const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
				89
				90	// Interpolation weight (range [0,1])
				91	const float yw = iy - fiy;
				92
				93	for (unsigned int x = 0; x < outputWidth; ++x)
				94	{
				95	// Real-valued and discrete width coordinates in input image.
				96	const float ix = boost::numeric_cast<float>(x) * scaleX;
				97	const float fix = floorf(ix);
				98	const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
				99
				100	// Interpolation weight (range [0,1]).
				101	const float xw = ix - fix;
				102
				103	// Discrete width/height coordinates of texels below and to the right of (x0, y0).
				104	const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
				105	const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
				106
				107	std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
				108	std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
				109	std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
				110	std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
				111
				112	for (unsigned c=0; c<3; ++c)
				113	{
				114	const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
				115	const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
				116	const float l = Lerp(ly0, ly1, yw);
				117	PutData(out, outputWidth, x, y, c, ((l/255.0f) - mean[c])/stddev[c]);
				118	}
				119	}
				120	}
				121	return out;
				122	}
				123
telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	124	} // namespace
				125
				126	InferenceTestImage::InferenceTestImage(char const* filePath)
				127	: m_Width(0u)
				128	, m_Height(0u)
				129	, m_NumChannels(0u)
				130	{
				131	int width;
				132	int height;
				133	int channels;
				134
				135	using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
				136	StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
				137
				138	if (stbData == nullptr)
				139	{
				140	throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load the image at %1%") % filePath));
				141	}
				142
				143	if (width == 0 \|\| height == 0)
				144	{
				145	throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load empty image at %1%") % filePath));
				146	}
				147
				148	m_Width = boost::numeric_cast<unsigned int>(width);
				149	m_Height = boost::numeric_cast<unsigned int>(height);
				150	m_NumChannels = boost::numeric_cast<unsigned int>(channels);
				151
				152	const unsigned int sizeInBytes = GetSizeInBytes();
				153	m_Data.resize(sizeInBytes);
				154	memcpy(m_Data.data(), stbData.get(), sizeInBytes);
				155	}
				156
				157	std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
				158	{
				159	if (x >= m_Width \|\| y >= m_Height)
				160	{
				161	throw InferenceTestImageOutOfBoundsAccess(boost::str(boost::format("Attempted out of bounds image access. "
				162	"Requested (%1%, %2%). Maximum valid coordinates (%3%, %4%).") % x % y % (m_Width - 1) % (m_Height - 1)));
				163	}
				164
				165	const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
				166	const uint8_t* const pixelData = m_Data.data() + pixelOffset;
				167	BOOST_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
				168
				169	std::array<uint8_t, 3> outPixelData;
				170	outPixelData.fill(0);
				171
				172	const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
				173	for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
				174	{
				175	outPixelData[c] = pixelData[c];
				176	}
				177
				178	return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
				179	}
				180
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	181
				182	void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	183	{
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	184	std::vector<uint8_t> newData;
				185	newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
				186
				187	// boost::numeric_cast<>() is used for user-provided data (protecting about overflows).
				188	// static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
				189	// a boost::numeric_cast<>() handled the conversion).
				190	const int nW = boost::numeric_cast<int>(newWidth);
				191	const int nH = boost::numeric_cast<int>(newHeight);
				192
				193	const int w = static_cast<int>(im.GetWidth());
				194	const int h = static_cast<int>(im.GetHeight());
				195	const int numChannels = static_cast<int>(im.GetNumChannels());
				196
				197	const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
				198	if (res == 0)
				199	{
				200	throw InferenceTestImageResizeFailed("The resizing operation failed");
				201	}
				202
				203	im.m_Data.swap(newData);
				204	im.m_Width = newWidth;
				205	im.m_Height = newHeight;
				206	}
				207
				208	std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
				209	unsigned int newHeight,
				210	const armnn::CheckLocation& location,
				211	const ResizingMethods meth,
				212	const std::array<float, 3>& mean,
				213	const std::array<float, 3>& stddev)
				214	{
				215	std::vector<float> out;
telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	216	if (newWidth == 0 \|\| newHeight == 0)
				217	{
				218	throw InferenceTestImageResizeFailed(boost::str(boost::format("None of the dimensions passed to a resize "
				219	"operation can be zero. Requested width: %1%. Requested height: %2%.") % newWidth % newHeight));
				220	}
				221
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	222	switch (meth) {
				223	case ResizingMethods::STB:
				224	{
				225	StbResize(*this, newWidth, newHeight);
				226	break;
				227	}
				228	case ResizingMethods::BilinearAndNormalized:
				229	{
				230	out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, mean, stddev);
				231	break;
				232	}
				233	default:
				234	throw InferenceTestImageResizeFailed(boost::str(
				235	boost::format("Unknown resizing method asked ArmNN only supports {STB, BilinearAndNormalized} %1%")
				236	% location.AsString()));
telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	237	}
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	238	return out;
telsoa01	4fcda01	2018-03-09 14:13:49 +0000	[diff] [blame]	239	}
				240
				241	void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
				242	{
				243	const int w = static_cast<int>(GetWidth());
				244	const int h = static_cast<int>(GetHeight());
				245	const int numChannels = static_cast<int>(GetNumChannels());
				246	int res = 0;
				247
				248	switch (format)
				249	{
				250	case WriteFormat::Png:
				251	{
				252	res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
				253	break;
				254	}
				255	case WriteFormat::Bmp:
				256	{
				257	res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
				258	break;
				259	}
				260	case WriteFormat::Tga:
				261	{
				262	res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
				263	break;
				264	}
				265	default:
				266	throw InferenceTestImageWriteFailed(boost::str(boost::format("Unknown format %1%")
				267	% static_cast<int>(format)));
				268	}
				269
				270	if (res == 0)
				271	{
				272	throw InferenceTestImageWriteFailed(boost::str(boost::format("An error occurred when writing to file %1%")
				273	% filePath));
				274	}
				275	}
				276
				277	template <typename TProcessValueCallable>
				278	std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
				279	const InferenceTestImage& image,
				280	TProcessValueCallable processValue)
				281	{
				282	const unsigned int h = image.GetHeight();
				283	const unsigned int w = image.GetWidth();
				284
				285	std::vector<float> imageData;
				286	imageData.resize(h * w * 3);
				287
				288	for (unsigned int j = 0; j < h; ++j)
				289	{
				290	for (unsigned int i = 0; i < w; ++i)
				291	{
				292	uint8_t r, g, b;
				293	std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
				294
				295	// ArmNN order: C, H, W
				296	const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
				297	const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
				298	const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
				299
				300	imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
				301	imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
				302	imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
				303	}
				304	}
				305
				306	return imageData;
				307	}
				308
				309	std::vector<float> GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout,
				310	const InferenceTestImage& image)
				311	{
				312	return GetImageDataInArmNnLayoutAsFloats(layout, image,
				313	[](ImageChannel channel, float value)
				314	{
				315	boost::ignore_unused(channel);
				316	return value / 255.f;
				317	});
				318	}
				319
				320	std::vector<float> GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout,
				321	const InferenceTestImage& image,
				322	const std::array<float, 3>& mean)
				323	{
				324	return GetImageDataInArmNnLayoutAsFloats(layout, image,
				325	[layout, &mean](ImageChannel channel, float value)
				326	{
				327	const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
				328	return value - mean[channelIndex];
				329	});
				330	}
surmeh01	bceff2f	2018-03-29 16:29:27 +0100	[diff] [blame]	331
				332	std::vector<float> GetImageDataAsNormalizedFloats(ImageChannelLayout layout,
				333	const InferenceTestImage& image)
				334	{
				335	std::vector<float> imageData;
				336	const unsigned int h = image.GetHeight();
				337	const unsigned int w = image.GetWidth();
				338
				339	const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
				340	const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
				341	const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
				342
				343	imageData.resize(h * w * 3);
				344	unsigned int offset = 0;
				345
				346	for (unsigned int j = 0; j < h; ++j)
				347	{
				348	for (unsigned int i = 0; i < w; ++i)
				349	{
				350	uint8_t r, g, b;
				351	std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
				352
				353	imageData[offset+rDstIndex] = float(r) / 255.0f;
				354	imageData[offset+gDstIndex] = float(g) / 255.0f;
				355	imageData[offset+bDstIndex] = float(b) / 255.0f;
				356	offset += 3;
				357	}
				358	}
				359
				360	return imageData;
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	361	}