blob: a1b6cf4bdb54f1eb20ece24e028ea2fda1aaf9b0 [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5#include "InferenceTestImage.hpp"
6
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +01007#include <armnn/utility/Assert.hpp>
Jan Eilers8eb25602020-03-09 12:13:48 +00008#include <armnn/utility/IgnoreUnused.hpp>
Matthew Sloyan80c6b142020-09-08 12:00:32 +01009#include <armnn/utility/NumericCast.hpp>
Jan Eilers8eb25602020-03-09 12:13:48 +000010
telsoa014fcda012018-03-09 14:13:49 +000011#include <boost/numeric/conversion/cast.hpp>
James Ward08f40162020-09-07 16:45:07 +010012#include <fmt/format.h>
telsoa014fcda012018-03-09 14:13:49 +000013
14#include <array>
15
16#define STB_IMAGE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010017#include <stb/stb_image.h>
telsoa014fcda012018-03-09 14:13:49 +000018
19#define STB_IMAGE_RESIZE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010020#include <stb/stb_image_resize.h>
telsoa014fcda012018-03-09 14:13:49 +000021
22#define STB_IMAGE_WRITE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010023#include <stb/stb_image_write.h>
telsoa014fcda012018-03-09 14:13:49 +000024
25namespace
26{
27
28unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
29{
30 switch (channelLayout)
31 {
32 case ImageChannelLayout::Rgb:
33 return static_cast<unsigned int>(channel);
34 case ImageChannelLayout::Bgr:
35 return 2u - static_cast<unsigned int>(channel);
36 default:
James Ward08f40162020-09-07 16:45:07 +010037 throw UnknownImageChannelLayout(fmt::format("Unknown layout {}", static_cast<int>(channelLayout)));
telsoa014fcda012018-03-09 14:13:49 +000038 }
39}
40
telsoa01c577f2c2018-08-31 09:22:23 +010041inline float Lerp(float a, float b, float w)
42{
43 return w * b + (1.f - w) * a;
44}
45
46inline void PutData(std::vector<float> & data,
47 const unsigned int width,
48 const unsigned int x,
49 const unsigned int y,
50 const unsigned int c,
51 float value)
52{
53 data[(3*((y*width)+x)) + c] = value;
54}
55
56std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
57 const unsigned int outputWidth,
58 const unsigned int outputHeight,
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +010059 const float scale,
telsoa01c577f2c2018-08-31 09:22:23 +010060 const std::array<float, 3>& mean,
61 const std::array<float, 3>& stddev)
62{
63 std::vector<float> out;
64 out.resize(outputWidth * outputHeight * 3);
65
66 // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
67 // image is projected into the input image to figure out the interpolants and weights. Note that this
68 // will yield different results than if projecting the centre of output texels.
69
70 const unsigned int inputWidth = image.GetWidth();
71 const unsigned int inputHeight = image.GetHeight();
72
73 // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
74 // in the input image.
75 const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
76 const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
77
78 uint8_t rgb_x0y0[3];
79 uint8_t rgb_x1y0[3];
80 uint8_t rgb_x0y1[3];
81 uint8_t rgb_x1y1[3];
82
83 for (unsigned int y = 0; y < outputHeight; ++y)
84 {
85 // Corresponding real-valued height coordinate in input image.
86 const float iy = boost::numeric_cast<float>(y) * scaleY;
87
88 // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
89 const float fiy = floorf(iy);
90 const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
91
92 // Interpolation weight (range [0,1])
93 const float yw = iy - fiy;
94
95 for (unsigned int x = 0; x < outputWidth; ++x)
96 {
97 // Real-valued and discrete width coordinates in input image.
98 const float ix = boost::numeric_cast<float>(x) * scaleX;
99 const float fix = floorf(ix);
100 const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
101
102 // Interpolation weight (range [0,1]).
103 const float xw = ix - fix;
104
105 // Discrete width/height coordinates of texels below and to the right of (x0, y0).
106 const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
107 const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
108
109 std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
110 std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
111 std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
112 std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
113
114 for (unsigned c=0; c<3; ++c)
115 {
116 const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
117 const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
118 const float l = Lerp(ly0, ly1, yw);
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +0100119 PutData(out, outputWidth, x, y, c, ((l / scale) - mean[c]) / stddev[c]);
telsoa01c577f2c2018-08-31 09:22:23 +0100120 }
121 }
122 }
123 return out;
124}
125
telsoa014fcda012018-03-09 14:13:49 +0000126} // namespace
127
128InferenceTestImage::InferenceTestImage(char const* filePath)
129 : m_Width(0u)
130 , m_Height(0u)
131 , m_NumChannels(0u)
132{
133 int width;
134 int height;
135 int channels;
136
137 using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
138 StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
139
140 if (stbData == nullptr)
141 {
James Ward08f40162020-09-07 16:45:07 +0100142 throw InferenceTestImageLoadFailed(fmt::format("Could not load the image at {}", filePath));
telsoa014fcda012018-03-09 14:13:49 +0000143 }
144
145 if (width == 0 || height == 0)
146 {
James Ward08f40162020-09-07 16:45:07 +0100147 throw InferenceTestImageLoadFailed(fmt::format("Could not load empty image at {}", filePath));
telsoa014fcda012018-03-09 14:13:49 +0000148 }
149
Matthew Sloyan80c6b142020-09-08 12:00:32 +0100150 m_Width = armnn::numeric_cast<unsigned int>(width);
151 m_Height = armnn::numeric_cast<unsigned int>(height);
152 m_NumChannels = armnn::numeric_cast<unsigned int>(channels);
telsoa014fcda012018-03-09 14:13:49 +0000153
154 const unsigned int sizeInBytes = GetSizeInBytes();
155 m_Data.resize(sizeInBytes);
156 memcpy(m_Data.data(), stbData.get(), sizeInBytes);
157}
158
159std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
160{
161 if (x >= m_Width || y >= m_Height)
162 {
James Ward08f40162020-09-07 16:45:07 +0100163 throw InferenceTestImageOutOfBoundsAccess(fmt::format("Attempted out of bounds image access. "
164 "Requested ({0}, {1}). Maximum valid coordinates ({2}, {3}).", x, y, (m_Width - 1), (m_Height - 1)));
telsoa014fcda012018-03-09 14:13:49 +0000165 }
166
167 const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
168 const uint8_t* const pixelData = m_Data.data() + pixelOffset;
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100169 ARMNN_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
telsoa014fcda012018-03-09 14:13:49 +0000170
171 std::array<uint8_t, 3> outPixelData;
172 outPixelData.fill(0);
173
174 const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
175 for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
176 {
177 outPixelData[c] = pixelData[c];
178 }
179
180 return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
181}
182
telsoa01c577f2c2018-08-31 09:22:23 +0100183
184void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
telsoa014fcda012018-03-09 14:13:49 +0000185{
telsoa01c577f2c2018-08-31 09:22:23 +0100186 std::vector<uint8_t> newData;
187 newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
188
Matthew Sloyan80c6b142020-09-08 12:00:32 +0100189 // armnn::numeric_cast<>() is used for user-provided data (protecting about overflows).
telsoa01c577f2c2018-08-31 09:22:23 +0100190 // static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
Matthew Sloyan80c6b142020-09-08 12:00:32 +0100191 // a armnn::numeric_cast<>() handled the conversion).
192 const int nW = armnn::numeric_cast<int>(newWidth);
193 const int nH = armnn::numeric_cast<int>(newHeight);
telsoa01c577f2c2018-08-31 09:22:23 +0100194
195 const int w = static_cast<int>(im.GetWidth());
196 const int h = static_cast<int>(im.GetHeight());
197 const int numChannels = static_cast<int>(im.GetNumChannels());
198
199 const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
200 if (res == 0)
201 {
202 throw InferenceTestImageResizeFailed("The resizing operation failed");
203 }
204
205 im.m_Data.swap(newData);
206 im.m_Width = newWidth;
207 im.m_Height = newHeight;
208}
209
210std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
211 unsigned int newHeight,
212 const armnn::CheckLocation& location,
213 const ResizingMethods meth,
214 const std::array<float, 3>& mean,
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +0100215 const std::array<float, 3>& stddev,
216 const float scale)
telsoa01c577f2c2018-08-31 09:22:23 +0100217{
218 std::vector<float> out;
telsoa014fcda012018-03-09 14:13:49 +0000219 if (newWidth == 0 || newHeight == 0)
220 {
James Ward08f40162020-09-07 16:45:07 +0100221 throw InferenceTestImageResizeFailed(fmt::format("None of the dimensions passed to a resize "
222 "operation can be zero. Requested width: {0}. Requested height: {1}.", newWidth, newHeight));
telsoa014fcda012018-03-09 14:13:49 +0000223 }
224
telsoa01c577f2c2018-08-31 09:22:23 +0100225 switch (meth) {
226 case ResizingMethods::STB:
227 {
228 StbResize(*this, newWidth, newHeight);
229 break;
230 }
231 case ResizingMethods::BilinearAndNormalized:
232 {
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +0100233 out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, scale, mean, stddev);
telsoa01c577f2c2018-08-31 09:22:23 +0100234 break;
235 }
236 default:
James Ward08f40162020-09-07 16:45:07 +0100237 throw InferenceTestImageResizeFailed(fmt::format("Unknown resizing method asked ArmNN only"
238 " supports {STB, BilinearAndNormalized} {}",
239 location.AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000240 }
telsoa01c577f2c2018-08-31 09:22:23 +0100241 return out;
telsoa014fcda012018-03-09 14:13:49 +0000242}
243
244void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
245{
246 const int w = static_cast<int>(GetWidth());
247 const int h = static_cast<int>(GetHeight());
248 const int numChannels = static_cast<int>(GetNumChannels());
249 int res = 0;
250
251 switch (format)
252 {
253 case WriteFormat::Png:
254 {
255 res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
256 break;
257 }
258 case WriteFormat::Bmp:
259 {
260 res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
261 break;
262 }
263 case WriteFormat::Tga:
264 {
265 res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
266 break;
267 }
268 default:
James Ward08f40162020-09-07 16:45:07 +0100269 throw InferenceTestImageWriteFailed(fmt::format("Unknown format {}", static_cast<int>(format)));
telsoa014fcda012018-03-09 14:13:49 +0000270 }
271
272 if (res == 0)
273 {
James Ward08f40162020-09-07 16:45:07 +0100274 throw InferenceTestImageWriteFailed(fmt::format("An error occurred when writing to file {}",
275 filePath));
telsoa014fcda012018-03-09 14:13:49 +0000276 }
277}
278
279template <typename TProcessValueCallable>
280std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
281 const InferenceTestImage& image,
282 TProcessValueCallable processValue)
283{
284 const unsigned int h = image.GetHeight();
285 const unsigned int w = image.GetWidth();
286
287 std::vector<float> imageData;
288 imageData.resize(h * w * 3);
289
290 for (unsigned int j = 0; j < h; ++j)
291 {
292 for (unsigned int i = 0; i < w; ++i)
293 {
294 uint8_t r, g, b;
295 std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
296
297 // ArmNN order: C, H, W
298 const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
299 const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
300 const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
301
302 imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
303 imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
304 imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
305 }
306 }
307
308 return imageData;
309}
310
311std::vector<float> GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout,
312 const InferenceTestImage& image)
313{
314 return GetImageDataInArmNnLayoutAsFloats(layout, image,
315 [](ImageChannel channel, float value)
316 {
Jan Eilers8eb25602020-03-09 12:13:48 +0000317 armnn::IgnoreUnused(channel);
telsoa014fcda012018-03-09 14:13:49 +0000318 return value / 255.f;
319 });
320}
321
322std::vector<float> GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout,
323 const InferenceTestImage& image,
324 const std::array<float, 3>& mean)
325{
326 return GetImageDataInArmNnLayoutAsFloats(layout, image,
327 [layout, &mean](ImageChannel channel, float value)
328 {
329 const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
330 return value - mean[channelIndex];
331 });
332}
surmeh01bceff2f2018-03-29 16:29:27 +0100333
334std::vector<float> GetImageDataAsNormalizedFloats(ImageChannelLayout layout,
335 const InferenceTestImage& image)
336{
337 std::vector<float> imageData;
338 const unsigned int h = image.GetHeight();
339 const unsigned int w = image.GetWidth();
340
341 const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
342 const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
343 const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
344
345 imageData.resize(h * w * 3);
346 unsigned int offset = 0;
347
348 for (unsigned int j = 0; j < h; ++j)
349 {
350 for (unsigned int i = 0; i < w; ++i)
351 {
352 uint8_t r, g, b;
353 std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
354
355 imageData[offset+rDstIndex] = float(r) / 255.0f;
356 imageData[offset+gDstIndex] = float(g) / 255.0f;
357 imageData[offset+bDstIndex] = float(b) / 255.0f;
358 offset += 3;
359 }
360 }
361
362 return imageData;
telsoa01c577f2c2018-08-31 09:22:23 +0100363}