blob: 1cf73caf45c5fcc5fc687d4f6ceb5aa84aab918a [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5#include "InferenceTestImage.hpp"
6
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +01007#include <armnn/utility/Assert.hpp>
Jan Eilers8eb25602020-03-09 12:13:48 +00008#include <armnn/utility/IgnoreUnused.hpp>
9
telsoa014fcda012018-03-09 14:13:49 +000010#include <boost/format.hpp>
telsoa014fcda012018-03-09 14:13:49 +000011#include <boost/numeric/conversion/cast.hpp>
12
13#include <array>
14
15#define STB_IMAGE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010016#include <stb/stb_image.h>
telsoa014fcda012018-03-09 14:13:49 +000017
18#define STB_IMAGE_RESIZE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010019#include <stb/stb_image_resize.h>
telsoa014fcda012018-03-09 14:13:49 +000020
21#define STB_IMAGE_WRITE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010022#include <stb/stb_image_write.h>
telsoa014fcda012018-03-09 14:13:49 +000023
24namespace
25{
26
27unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
28{
29 switch (channelLayout)
30 {
31 case ImageChannelLayout::Rgb:
32 return static_cast<unsigned int>(channel);
33 case ImageChannelLayout::Bgr:
34 return 2u - static_cast<unsigned int>(channel);
35 default:
36 throw UnknownImageChannelLayout(boost::str(boost::format("Unknown layout %1%")
37 % static_cast<int>(channelLayout)));
38 }
39}
40
telsoa01c577f2c2018-08-31 09:22:23 +010041inline float Lerp(float a, float b, float w)
42{
43 return w * b + (1.f - w) * a;
44}
45
46inline void PutData(std::vector<float> & data,
47 const unsigned int width,
48 const unsigned int x,
49 const unsigned int y,
50 const unsigned int c,
51 float value)
52{
53 data[(3*((y*width)+x)) + c] = value;
54}
55
56std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
57 const unsigned int outputWidth,
58 const unsigned int outputHeight,
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +010059 const float scale,
telsoa01c577f2c2018-08-31 09:22:23 +010060 const std::array<float, 3>& mean,
61 const std::array<float, 3>& stddev)
62{
63 std::vector<float> out;
64 out.resize(outputWidth * outputHeight * 3);
65
66 // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
67 // image is projected into the input image to figure out the interpolants and weights. Note that this
68 // will yield different results than if projecting the centre of output texels.
69
70 const unsigned int inputWidth = image.GetWidth();
71 const unsigned int inputHeight = image.GetHeight();
72
73 // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
74 // in the input image.
75 const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
76 const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
77
78 uint8_t rgb_x0y0[3];
79 uint8_t rgb_x1y0[3];
80 uint8_t rgb_x0y1[3];
81 uint8_t rgb_x1y1[3];
82
83 for (unsigned int y = 0; y < outputHeight; ++y)
84 {
85 // Corresponding real-valued height coordinate in input image.
86 const float iy = boost::numeric_cast<float>(y) * scaleY;
87
88 // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
89 const float fiy = floorf(iy);
90 const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
91
92 // Interpolation weight (range [0,1])
93 const float yw = iy - fiy;
94
95 for (unsigned int x = 0; x < outputWidth; ++x)
96 {
97 // Real-valued and discrete width coordinates in input image.
98 const float ix = boost::numeric_cast<float>(x) * scaleX;
99 const float fix = floorf(ix);
100 const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
101
102 // Interpolation weight (range [0,1]).
103 const float xw = ix - fix;
104
105 // Discrete width/height coordinates of texels below and to the right of (x0, y0).
106 const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
107 const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
108
109 std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
110 std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
111 std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
112 std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
113
114 for (unsigned c=0; c<3; ++c)
115 {
116 const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
117 const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
118 const float l = Lerp(ly0, ly1, yw);
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +0100119 PutData(out, outputWidth, x, y, c, ((l / scale) - mean[c]) / stddev[c]);
telsoa01c577f2c2018-08-31 09:22:23 +0100120 }
121 }
122 }
123 return out;
124}
125
telsoa014fcda012018-03-09 14:13:49 +0000126} // namespace
127
128InferenceTestImage::InferenceTestImage(char const* filePath)
129 : m_Width(0u)
130 , m_Height(0u)
131 , m_NumChannels(0u)
132{
133 int width;
134 int height;
135 int channels;
136
137 using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
138 StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
139
140 if (stbData == nullptr)
141 {
142 throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load the image at %1%") % filePath));
143 }
144
145 if (width == 0 || height == 0)
146 {
147 throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load empty image at %1%") % filePath));
148 }
149
150 m_Width = boost::numeric_cast<unsigned int>(width);
151 m_Height = boost::numeric_cast<unsigned int>(height);
152 m_NumChannels = boost::numeric_cast<unsigned int>(channels);
153
154 const unsigned int sizeInBytes = GetSizeInBytes();
155 m_Data.resize(sizeInBytes);
156 memcpy(m_Data.data(), stbData.get(), sizeInBytes);
157}
158
159std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
160{
161 if (x >= m_Width || y >= m_Height)
162 {
163 throw InferenceTestImageOutOfBoundsAccess(boost::str(boost::format("Attempted out of bounds image access. "
164 "Requested (%1%, %2%). Maximum valid coordinates (%3%, %4%).") % x % y % (m_Width - 1) % (m_Height - 1)));
165 }
166
167 const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
168 const uint8_t* const pixelData = m_Data.data() + pixelOffset;
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100169 ARMNN_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
telsoa014fcda012018-03-09 14:13:49 +0000170
171 std::array<uint8_t, 3> outPixelData;
172 outPixelData.fill(0);
173
174 const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
175 for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
176 {
177 outPixelData[c] = pixelData[c];
178 }
179
180 return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
181}
182
telsoa01c577f2c2018-08-31 09:22:23 +0100183
184void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
telsoa014fcda012018-03-09 14:13:49 +0000185{
telsoa01c577f2c2018-08-31 09:22:23 +0100186 std::vector<uint8_t> newData;
187 newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
188
189 // boost::numeric_cast<>() is used for user-provided data (protecting about overflows).
190 // static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
191 // a boost::numeric_cast<>() handled the conversion).
192 const int nW = boost::numeric_cast<int>(newWidth);
193 const int nH = boost::numeric_cast<int>(newHeight);
194
195 const int w = static_cast<int>(im.GetWidth());
196 const int h = static_cast<int>(im.GetHeight());
197 const int numChannels = static_cast<int>(im.GetNumChannels());
198
199 const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
200 if (res == 0)
201 {
202 throw InferenceTestImageResizeFailed("The resizing operation failed");
203 }
204
205 im.m_Data.swap(newData);
206 im.m_Width = newWidth;
207 im.m_Height = newHeight;
208}
209
210std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
211 unsigned int newHeight,
212 const armnn::CheckLocation& location,
213 const ResizingMethods meth,
214 const std::array<float, 3>& mean,
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +0100215 const std::array<float, 3>& stddev,
216 const float scale)
telsoa01c577f2c2018-08-31 09:22:23 +0100217{
218 std::vector<float> out;
telsoa014fcda012018-03-09 14:13:49 +0000219 if (newWidth == 0 || newHeight == 0)
220 {
221 throw InferenceTestImageResizeFailed(boost::str(boost::format("None of the dimensions passed to a resize "
222 "operation can be zero. Requested width: %1%. Requested height: %2%.") % newWidth % newHeight));
223 }
224
telsoa01c577f2c2018-08-31 09:22:23 +0100225 switch (meth) {
226 case ResizingMethods::STB:
227 {
228 StbResize(*this, newWidth, newHeight);
229 break;
230 }
231 case ResizingMethods::BilinearAndNormalized:
232 {
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +0100233 out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, scale, mean, stddev);
telsoa01c577f2c2018-08-31 09:22:23 +0100234 break;
235 }
236 default:
237 throw InferenceTestImageResizeFailed(boost::str(
238 boost::format("Unknown resizing method asked ArmNN only supports {STB, BilinearAndNormalized} %1%")
239 % location.AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000240 }
telsoa01c577f2c2018-08-31 09:22:23 +0100241 return out;
telsoa014fcda012018-03-09 14:13:49 +0000242}
243
244void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
245{
246 const int w = static_cast<int>(GetWidth());
247 const int h = static_cast<int>(GetHeight());
248 const int numChannels = static_cast<int>(GetNumChannels());
249 int res = 0;
250
251 switch (format)
252 {
253 case WriteFormat::Png:
254 {
255 res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
256 break;
257 }
258 case WriteFormat::Bmp:
259 {
260 res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
261 break;
262 }
263 case WriteFormat::Tga:
264 {
265 res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
266 break;
267 }
268 default:
269 throw InferenceTestImageWriteFailed(boost::str(boost::format("Unknown format %1%")
270 % static_cast<int>(format)));
271 }
272
273 if (res == 0)
274 {
275 throw InferenceTestImageWriteFailed(boost::str(boost::format("An error occurred when writing to file %1%")
276 % filePath));
277 }
278}
279
280template <typename TProcessValueCallable>
281std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
282 const InferenceTestImage& image,
283 TProcessValueCallable processValue)
284{
285 const unsigned int h = image.GetHeight();
286 const unsigned int w = image.GetWidth();
287
288 std::vector<float> imageData;
289 imageData.resize(h * w * 3);
290
291 for (unsigned int j = 0; j < h; ++j)
292 {
293 for (unsigned int i = 0; i < w; ++i)
294 {
295 uint8_t r, g, b;
296 std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
297
298 // ArmNN order: C, H, W
299 const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
300 const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
301 const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
302
303 imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
304 imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
305 imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
306 }
307 }
308
309 return imageData;
310}
311
312std::vector<float> GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout,
313 const InferenceTestImage& image)
314{
315 return GetImageDataInArmNnLayoutAsFloats(layout, image,
316 [](ImageChannel channel, float value)
317 {
Jan Eilers8eb25602020-03-09 12:13:48 +0000318 armnn::IgnoreUnused(channel);
telsoa014fcda012018-03-09 14:13:49 +0000319 return value / 255.f;
320 });
321}
322
323std::vector<float> GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout,
324 const InferenceTestImage& image,
325 const std::array<float, 3>& mean)
326{
327 return GetImageDataInArmNnLayoutAsFloats(layout, image,
328 [layout, &mean](ImageChannel channel, float value)
329 {
330 const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
331 return value - mean[channelIndex];
332 });
333}
surmeh01bceff2f2018-03-29 16:29:27 +0100334
335std::vector<float> GetImageDataAsNormalizedFloats(ImageChannelLayout layout,
336 const InferenceTestImage& image)
337{
338 std::vector<float> imageData;
339 const unsigned int h = image.GetHeight();
340 const unsigned int w = image.GetWidth();
341
342 const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
343 const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
344 const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
345
346 imageData.resize(h * w * 3);
347 unsigned int offset = 0;
348
349 for (unsigned int j = 0; j < h; ++j)
350 {
351 for (unsigned int i = 0; i < w; ++i)
352 {
353 uint8_t r, g, b;
354 std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
355
356 imageData[offset+rDstIndex] = float(r) / 255.0f;
357 imageData[offset+gDstIndex] = float(g) / 255.0f;
358 imageData[offset+bDstIndex] = float(b) / 255.0f;
359 offset += 3;
360 }
361 }
362
363 return imageData;
telsoa01c577f2c2018-08-31 09:22:23 +0100364}