blob: b011e6ac8f1627276328c2c18d97c04113059079 [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5#include "InferenceTestImage.hpp"
6
7#include <boost/core/ignore_unused.hpp>
8#include <boost/format.hpp>
9#include <boost/core/ignore_unused.hpp>
10#include <boost/numeric/conversion/cast.hpp>
11
12#include <array>
13
14#define STB_IMAGE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010015#include <stb/stb_image.h>
telsoa014fcda012018-03-09 14:13:49 +000016
17#define STB_IMAGE_RESIZE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010018#include <stb/stb_image_resize.h>
telsoa014fcda012018-03-09 14:13:49 +000019
20#define STB_IMAGE_WRITE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010021#include <stb/stb_image_write.h>
telsoa014fcda012018-03-09 14:13:49 +000022
23namespace
24{
25
26unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
27{
28 switch (channelLayout)
29 {
30 case ImageChannelLayout::Rgb:
31 return static_cast<unsigned int>(channel);
32 case ImageChannelLayout::Bgr:
33 return 2u - static_cast<unsigned int>(channel);
34 default:
35 throw UnknownImageChannelLayout(boost::str(boost::format("Unknown layout %1%")
36 % static_cast<int>(channelLayout)));
37 }
38}
39
telsoa01c577f2c2018-08-31 09:22:23 +010040inline float Lerp(float a, float b, float w)
41{
42 return w * b + (1.f - w) * a;
43}
44
45inline void PutData(std::vector<float> & data,
46 const unsigned int width,
47 const unsigned int x,
48 const unsigned int y,
49 const unsigned int c,
50 float value)
51{
52 data[(3*((y*width)+x)) + c] = value;
53}
54
55std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
56 const unsigned int outputWidth,
57 const unsigned int outputHeight,
58 const std::array<float, 3>& mean,
59 const std::array<float, 3>& stddev)
60{
61 std::vector<float> out;
62 out.resize(outputWidth * outputHeight * 3);
63
64 // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
65 // image is projected into the input image to figure out the interpolants and weights. Note that this
66 // will yield different results than if projecting the centre of output texels.
67
68 const unsigned int inputWidth = image.GetWidth();
69 const unsigned int inputHeight = image.GetHeight();
70
71 // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
72 // in the input image.
73 const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
74 const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
75
76 uint8_t rgb_x0y0[3];
77 uint8_t rgb_x1y0[3];
78 uint8_t rgb_x0y1[3];
79 uint8_t rgb_x1y1[3];
80
81 for (unsigned int y = 0; y < outputHeight; ++y)
82 {
83 // Corresponding real-valued height coordinate in input image.
84 const float iy = boost::numeric_cast<float>(y) * scaleY;
85
86 // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
87 const float fiy = floorf(iy);
88 const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
89
90 // Interpolation weight (range [0,1])
91 const float yw = iy - fiy;
92
93 for (unsigned int x = 0; x < outputWidth; ++x)
94 {
95 // Real-valued and discrete width coordinates in input image.
96 const float ix = boost::numeric_cast<float>(x) * scaleX;
97 const float fix = floorf(ix);
98 const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
99
100 // Interpolation weight (range [0,1]).
101 const float xw = ix - fix;
102
103 // Discrete width/height coordinates of texels below and to the right of (x0, y0).
104 const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
105 const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
106
107 std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
108 std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
109 std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
110 std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
111
112 for (unsigned c=0; c<3; ++c)
113 {
114 const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
115 const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
116 const float l = Lerp(ly0, ly1, yw);
117 PutData(out, outputWidth, x, y, c, ((l/255.0f) - mean[c])/stddev[c]);
118 }
119 }
120 }
121 return out;
122}
123
telsoa014fcda012018-03-09 14:13:49 +0000124} // namespace
125
126InferenceTestImage::InferenceTestImage(char const* filePath)
127 : m_Width(0u)
128 , m_Height(0u)
129 , m_NumChannels(0u)
130{
131 int width;
132 int height;
133 int channels;
134
135 using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
136 StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
137
138 if (stbData == nullptr)
139 {
140 throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load the image at %1%") % filePath));
141 }
142
143 if (width == 0 || height == 0)
144 {
145 throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load empty image at %1%") % filePath));
146 }
147
148 m_Width = boost::numeric_cast<unsigned int>(width);
149 m_Height = boost::numeric_cast<unsigned int>(height);
150 m_NumChannels = boost::numeric_cast<unsigned int>(channels);
151
152 const unsigned int sizeInBytes = GetSizeInBytes();
153 m_Data.resize(sizeInBytes);
154 memcpy(m_Data.data(), stbData.get(), sizeInBytes);
155}
156
157std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
158{
159 if (x >= m_Width || y >= m_Height)
160 {
161 throw InferenceTestImageOutOfBoundsAccess(boost::str(boost::format("Attempted out of bounds image access. "
162 "Requested (%1%, %2%). Maximum valid coordinates (%3%, %4%).") % x % y % (m_Width - 1) % (m_Height - 1)));
163 }
164
165 const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
166 const uint8_t* const pixelData = m_Data.data() + pixelOffset;
167 BOOST_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
168
169 std::array<uint8_t, 3> outPixelData;
170 outPixelData.fill(0);
171
172 const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
173 for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
174 {
175 outPixelData[c] = pixelData[c];
176 }
177
178 return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
179}
180
telsoa01c577f2c2018-08-31 09:22:23 +0100181
182void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
telsoa014fcda012018-03-09 14:13:49 +0000183{
telsoa01c577f2c2018-08-31 09:22:23 +0100184 std::vector<uint8_t> newData;
185 newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
186
187 // boost::numeric_cast<>() is used for user-provided data (protecting about overflows).
188 // static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
189 // a boost::numeric_cast<>() handled the conversion).
190 const int nW = boost::numeric_cast<int>(newWidth);
191 const int nH = boost::numeric_cast<int>(newHeight);
192
193 const int w = static_cast<int>(im.GetWidth());
194 const int h = static_cast<int>(im.GetHeight());
195 const int numChannels = static_cast<int>(im.GetNumChannels());
196
197 const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
198 if (res == 0)
199 {
200 throw InferenceTestImageResizeFailed("The resizing operation failed");
201 }
202
203 im.m_Data.swap(newData);
204 im.m_Width = newWidth;
205 im.m_Height = newHeight;
206}
207
208std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
209 unsigned int newHeight,
210 const armnn::CheckLocation& location,
211 const ResizingMethods meth,
212 const std::array<float, 3>& mean,
213 const std::array<float, 3>& stddev)
214{
215 std::vector<float> out;
telsoa014fcda012018-03-09 14:13:49 +0000216 if (newWidth == 0 || newHeight == 0)
217 {
218 throw InferenceTestImageResizeFailed(boost::str(boost::format("None of the dimensions passed to a resize "
219 "operation can be zero. Requested width: %1%. Requested height: %2%.") % newWidth % newHeight));
220 }
221
telsoa01c577f2c2018-08-31 09:22:23 +0100222 switch (meth) {
223 case ResizingMethods::STB:
224 {
225 StbResize(*this, newWidth, newHeight);
226 break;
227 }
228 case ResizingMethods::BilinearAndNormalized:
229 {
230 out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, mean, stddev);
231 break;
232 }
233 default:
234 throw InferenceTestImageResizeFailed(boost::str(
235 boost::format("Unknown resizing method asked ArmNN only supports {STB, BilinearAndNormalized} %1%")
236 % location.AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000237 }
telsoa01c577f2c2018-08-31 09:22:23 +0100238 return out;
telsoa014fcda012018-03-09 14:13:49 +0000239}
240
241void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
242{
243 const int w = static_cast<int>(GetWidth());
244 const int h = static_cast<int>(GetHeight());
245 const int numChannels = static_cast<int>(GetNumChannels());
246 int res = 0;
247
248 switch (format)
249 {
250 case WriteFormat::Png:
251 {
252 res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
253 break;
254 }
255 case WriteFormat::Bmp:
256 {
257 res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
258 break;
259 }
260 case WriteFormat::Tga:
261 {
262 res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
263 break;
264 }
265 default:
266 throw InferenceTestImageWriteFailed(boost::str(boost::format("Unknown format %1%")
267 % static_cast<int>(format)));
268 }
269
270 if (res == 0)
271 {
272 throw InferenceTestImageWriteFailed(boost::str(boost::format("An error occurred when writing to file %1%")
273 % filePath));
274 }
275}
276
277template <typename TProcessValueCallable>
278std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
279 const InferenceTestImage& image,
280 TProcessValueCallable processValue)
281{
282 const unsigned int h = image.GetHeight();
283 const unsigned int w = image.GetWidth();
284
285 std::vector<float> imageData;
286 imageData.resize(h * w * 3);
287
288 for (unsigned int j = 0; j < h; ++j)
289 {
290 for (unsigned int i = 0; i < w; ++i)
291 {
292 uint8_t r, g, b;
293 std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
294
295 // ArmNN order: C, H, W
296 const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
297 const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
298 const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
299
300 imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
301 imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
302 imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
303 }
304 }
305
306 return imageData;
307}
308
309std::vector<float> GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout,
310 const InferenceTestImage& image)
311{
312 return GetImageDataInArmNnLayoutAsFloats(layout, image,
313 [](ImageChannel channel, float value)
314 {
315 boost::ignore_unused(channel);
316 return value / 255.f;
317 });
318}
319
320std::vector<float> GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout,
321 const InferenceTestImage& image,
322 const std::array<float, 3>& mean)
323{
324 return GetImageDataInArmNnLayoutAsFloats(layout, image,
325 [layout, &mean](ImageChannel channel, float value)
326 {
327 const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
328 return value - mean[channelIndex];
329 });
330}
surmeh01bceff2f2018-03-29 16:29:27 +0100331
332std::vector<float> GetImageDataAsNormalizedFloats(ImageChannelLayout layout,
333 const InferenceTestImage& image)
334{
335 std::vector<float> imageData;
336 const unsigned int h = image.GetHeight();
337 const unsigned int w = image.GetWidth();
338
339 const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
340 const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
341 const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
342
343 imageData.resize(h * w * 3);
344 unsigned int offset = 0;
345
346 for (unsigned int j = 0; j < h; ++j)
347 {
348 for (unsigned int i = 0; i < w; ++i)
349 {
350 uint8_t r, g, b;
351 std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
352
353 imageData[offset+rDstIndex] = float(r) / 255.0f;
354 imageData[offset+gDstIndex] = float(g) / 255.0f;
355 imageData[offset+bDstIndex] = float(b) / 255.0f;
356 offset += 3;
357 }
358 }
359
360 return imageData;
telsoa01c577f2c2018-08-31 09:22:23 +0100361}