blob: bb1f5c2397741d2d21bda1b63bbffa3506b71424 [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5#include "InferenceTestImage.hpp"
6
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +01007#include <armnn/utility/Assert.hpp>
Jan Eilers8eb25602020-03-09 12:13:48 +00008#include <armnn/utility/IgnoreUnused.hpp>
Matthew Sloyan80c6b142020-09-08 12:00:32 +01009#include <armnn/utility/NumericCast.hpp>
Jan Eilers8eb25602020-03-09 12:13:48 +000010
James Ward08f40162020-09-07 16:45:07 +010011#include <fmt/format.h>
telsoa014fcda012018-03-09 14:13:49 +000012
13#include <array>
14
15#define STB_IMAGE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010016#include <stb/stb_image.h>
telsoa014fcda012018-03-09 14:13:49 +000017
18#define STB_IMAGE_RESIZE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010019#include <stb/stb_image_resize.h>
telsoa014fcda012018-03-09 14:13:49 +000020
21#define STB_IMAGE_WRITE_IMPLEMENTATION
Sadik Armagan93e2e402019-05-02 09:31:38 +010022#include <stb/stb_image_write.h>
telsoa014fcda012018-03-09 14:13:49 +000023
24namespace
25{
26
27unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
28{
29 switch (channelLayout)
30 {
31 case ImageChannelLayout::Rgb:
32 return static_cast<unsigned int>(channel);
33 case ImageChannelLayout::Bgr:
34 return 2u - static_cast<unsigned int>(channel);
35 default:
James Ward08f40162020-09-07 16:45:07 +010036 throw UnknownImageChannelLayout(fmt::format("Unknown layout {}", static_cast<int>(channelLayout)));
telsoa014fcda012018-03-09 14:13:49 +000037 }
38}
39
telsoa01c577f2c2018-08-31 09:22:23 +010040inline float Lerp(float a, float b, float w)
41{
42 return w * b + (1.f - w) * a;
43}
44
45inline void PutData(std::vector<float> & data,
46 const unsigned int width,
47 const unsigned int x,
48 const unsigned int y,
49 const unsigned int c,
50 float value)
51{
52 data[(3*((y*width)+x)) + c] = value;
53}
54
55std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
56 const unsigned int outputWidth,
57 const unsigned int outputHeight,
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +010058 const float scale,
telsoa01c577f2c2018-08-31 09:22:23 +010059 const std::array<float, 3>& mean,
60 const std::array<float, 3>& stddev)
61{
62 std::vector<float> out;
63 out.resize(outputWidth * outputHeight * 3);
64
65 // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
66 // image is projected into the input image to figure out the interpolants and weights. Note that this
67 // will yield different results than if projecting the centre of output texels.
68
69 const unsigned int inputWidth = image.GetWidth();
70 const unsigned int inputHeight = image.GetHeight();
71
72 // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
73 // in the input image.
Matthew Sloyan24ac8592020-09-23 16:57:23 +010074 const float scaleY = armnn::numeric_cast<float>(inputHeight) / armnn::numeric_cast<float>(outputHeight);
75 const float scaleX = armnn::numeric_cast<float>(inputWidth) / armnn::numeric_cast<float>(outputWidth);
telsoa01c577f2c2018-08-31 09:22:23 +010076
77 uint8_t rgb_x0y0[3];
78 uint8_t rgb_x1y0[3];
79 uint8_t rgb_x0y1[3];
80 uint8_t rgb_x1y1[3];
81
82 for (unsigned int y = 0; y < outputHeight; ++y)
83 {
84 // Corresponding real-valued height coordinate in input image.
Matthew Sloyan24ac8592020-09-23 16:57:23 +010085 const float iy = armnn::numeric_cast<float>(y) * scaleY;
telsoa01c577f2c2018-08-31 09:22:23 +010086
87 // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
88 const float fiy = floorf(iy);
Matthew Sloyan24ac8592020-09-23 16:57:23 +010089 const unsigned int y0 = armnn::numeric_cast<unsigned int>(fiy);
telsoa01c577f2c2018-08-31 09:22:23 +010090
91 // Interpolation weight (range [0,1])
92 const float yw = iy - fiy;
93
94 for (unsigned int x = 0; x < outputWidth; ++x)
95 {
96 // Real-valued and discrete width coordinates in input image.
Matthew Sloyan24ac8592020-09-23 16:57:23 +010097 const float ix = armnn::numeric_cast<float>(x) * scaleX;
telsoa01c577f2c2018-08-31 09:22:23 +010098 const float fix = floorf(ix);
Matthew Sloyan24ac8592020-09-23 16:57:23 +010099 const unsigned int x0 = armnn::numeric_cast<unsigned int>(fix);
telsoa01c577f2c2018-08-31 09:22:23 +0100100
101 // Interpolation weight (range [0,1]).
102 const float xw = ix - fix;
103
104 // Discrete width/height coordinates of texels below and to the right of (x0, y0).
105 const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
106 const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
107
108 std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
109 std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
110 std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
111 std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
112
113 for (unsigned c=0; c<3; ++c)
114 {
115 const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
116 const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
117 const float l = Lerp(ly0, ly1, yw);
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +0100118 PutData(out, outputWidth, x, y, c, ((l / scale) - mean[c]) / stddev[c]);
telsoa01c577f2c2018-08-31 09:22:23 +0100119 }
120 }
121 }
122 return out;
123}
124
telsoa014fcda012018-03-09 14:13:49 +0000125} // namespace
126
127InferenceTestImage::InferenceTestImage(char const* filePath)
128 : m_Width(0u)
129 , m_Height(0u)
130 , m_NumChannels(0u)
131{
132 int width;
133 int height;
134 int channels;
135
136 using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
137 StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
138
139 if (stbData == nullptr)
140 {
James Ward08f40162020-09-07 16:45:07 +0100141 throw InferenceTestImageLoadFailed(fmt::format("Could not load the image at {}", filePath));
telsoa014fcda012018-03-09 14:13:49 +0000142 }
143
144 if (width == 0 || height == 0)
145 {
James Ward08f40162020-09-07 16:45:07 +0100146 throw InferenceTestImageLoadFailed(fmt::format("Could not load empty image at {}", filePath));
telsoa014fcda012018-03-09 14:13:49 +0000147 }
148
Matthew Sloyan80c6b142020-09-08 12:00:32 +0100149 m_Width = armnn::numeric_cast<unsigned int>(width);
150 m_Height = armnn::numeric_cast<unsigned int>(height);
151 m_NumChannels = armnn::numeric_cast<unsigned int>(channels);
telsoa014fcda012018-03-09 14:13:49 +0000152
153 const unsigned int sizeInBytes = GetSizeInBytes();
154 m_Data.resize(sizeInBytes);
155 memcpy(m_Data.data(), stbData.get(), sizeInBytes);
156}
157
158std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
159{
160 if (x >= m_Width || y >= m_Height)
161 {
James Ward08f40162020-09-07 16:45:07 +0100162 throw InferenceTestImageOutOfBoundsAccess(fmt::format("Attempted out of bounds image access. "
163 "Requested ({0}, {1}). Maximum valid coordinates ({2}, {3}).", x, y, (m_Width - 1), (m_Height - 1)));
telsoa014fcda012018-03-09 14:13:49 +0000164 }
165
166 const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
167 const uint8_t* const pixelData = m_Data.data() + pixelOffset;
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100168 ARMNN_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
telsoa014fcda012018-03-09 14:13:49 +0000169
170 std::array<uint8_t, 3> outPixelData;
171 outPixelData.fill(0);
172
173 const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
174 for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
175 {
176 outPixelData[c] = pixelData[c];
177 }
178
179 return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
180}
181
telsoa01c577f2c2018-08-31 09:22:23 +0100182
183void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
telsoa014fcda012018-03-09 14:13:49 +0000184{
telsoa01c577f2c2018-08-31 09:22:23 +0100185 std::vector<uint8_t> newData;
186 newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
187
Matthew Sloyan80c6b142020-09-08 12:00:32 +0100188 // armnn::numeric_cast<>() is used for user-provided data (protecting about overflows).
telsoa01c577f2c2018-08-31 09:22:23 +0100189 // static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
Matthew Sloyan80c6b142020-09-08 12:00:32 +0100190 // a armnn::numeric_cast<>() handled the conversion).
191 const int nW = armnn::numeric_cast<int>(newWidth);
192 const int nH = armnn::numeric_cast<int>(newHeight);
telsoa01c577f2c2018-08-31 09:22:23 +0100193
194 const int w = static_cast<int>(im.GetWidth());
195 const int h = static_cast<int>(im.GetHeight());
196 const int numChannels = static_cast<int>(im.GetNumChannels());
197
198 const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
199 if (res == 0)
200 {
201 throw InferenceTestImageResizeFailed("The resizing operation failed");
202 }
203
204 im.m_Data.swap(newData);
205 im.m_Width = newWidth;
206 im.m_Height = newHeight;
207}
208
209std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
210 unsigned int newHeight,
211 const armnn::CheckLocation& location,
212 const ResizingMethods meth,
213 const std::array<float, 3>& mean,
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +0100214 const std::array<float, 3>& stddev,
215 const float scale)
telsoa01c577f2c2018-08-31 09:22:23 +0100216{
217 std::vector<float> out;
telsoa014fcda012018-03-09 14:13:49 +0000218 if (newWidth == 0 || newHeight == 0)
219 {
James Ward08f40162020-09-07 16:45:07 +0100220 throw InferenceTestImageResizeFailed(fmt::format("None of the dimensions passed to a resize "
221 "operation can be zero. Requested width: {0}. Requested height: {1}.", newWidth, newHeight));
telsoa014fcda012018-03-09 14:13:49 +0000222 }
223
telsoa01c577f2c2018-08-31 09:22:23 +0100224 switch (meth) {
225 case ResizingMethods::STB:
226 {
227 StbResize(*this, newWidth, newHeight);
228 break;
229 }
230 case ResizingMethods::BilinearAndNormalized:
231 {
FinnWilliamsArmaf8b72d2019-05-22 14:50:55 +0100232 out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, scale, mean, stddev);
telsoa01c577f2c2018-08-31 09:22:23 +0100233 break;
234 }
235 default:
James Ward08f40162020-09-07 16:45:07 +0100236 throw InferenceTestImageResizeFailed(fmt::format("Unknown resizing method asked ArmNN only"
237 " supports {STB, BilinearAndNormalized} {}",
238 location.AsString()));
telsoa014fcda012018-03-09 14:13:49 +0000239 }
telsoa01c577f2c2018-08-31 09:22:23 +0100240 return out;
telsoa014fcda012018-03-09 14:13:49 +0000241}
242
243void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
244{
245 const int w = static_cast<int>(GetWidth());
246 const int h = static_cast<int>(GetHeight());
247 const int numChannels = static_cast<int>(GetNumChannels());
248 int res = 0;
249
250 switch (format)
251 {
252 case WriteFormat::Png:
253 {
254 res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
255 break;
256 }
257 case WriteFormat::Bmp:
258 {
259 res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
260 break;
261 }
262 case WriteFormat::Tga:
263 {
264 res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
265 break;
266 }
267 default:
James Ward08f40162020-09-07 16:45:07 +0100268 throw InferenceTestImageWriteFailed(fmt::format("Unknown format {}", static_cast<int>(format)));
telsoa014fcda012018-03-09 14:13:49 +0000269 }
270
271 if (res == 0)
272 {
James Ward08f40162020-09-07 16:45:07 +0100273 throw InferenceTestImageWriteFailed(fmt::format("An error occurred when writing to file {}",
274 filePath));
telsoa014fcda012018-03-09 14:13:49 +0000275 }
276}
277
278template <typename TProcessValueCallable>
279std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
280 const InferenceTestImage& image,
281 TProcessValueCallable processValue)
282{
283 const unsigned int h = image.GetHeight();
284 const unsigned int w = image.GetWidth();
285
286 std::vector<float> imageData;
287 imageData.resize(h * w * 3);
288
289 for (unsigned int j = 0; j < h; ++j)
290 {
291 for (unsigned int i = 0; i < w; ++i)
292 {
293 uint8_t r, g, b;
294 std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
295
296 // ArmNN order: C, H, W
297 const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
298 const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
299 const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
300
301 imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
302 imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
303 imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
304 }
305 }
306
307 return imageData;
308}
309
310std::vector<float> GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout,
311 const InferenceTestImage& image)
312{
313 return GetImageDataInArmNnLayoutAsFloats(layout, image,
314 [](ImageChannel channel, float value)
315 {
Jan Eilers8eb25602020-03-09 12:13:48 +0000316 armnn::IgnoreUnused(channel);
telsoa014fcda012018-03-09 14:13:49 +0000317 return value / 255.f;
318 });
319}
320
321std::vector<float> GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout,
322 const InferenceTestImage& image,
323 const std::array<float, 3>& mean)
324{
325 return GetImageDataInArmNnLayoutAsFloats(layout, image,
326 [layout, &mean](ImageChannel channel, float value)
327 {
328 const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
329 return value - mean[channelIndex];
330 });
331}
surmeh01bceff2f2018-03-29 16:29:27 +0100332
333std::vector<float> GetImageDataAsNormalizedFloats(ImageChannelLayout layout,
334 const InferenceTestImage& image)
335{
336 std::vector<float> imageData;
337 const unsigned int h = image.GetHeight();
338 const unsigned int w = image.GetWidth();
339
340 const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
341 const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
342 const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
343
344 imageData.resize(h * w * 3);
345 unsigned int offset = 0;
346
347 for (unsigned int j = 0; j < h; ++j)
348 {
349 for (unsigned int i = 0; i < w; ++i)
350 {
351 uint8_t r, g, b;
352 std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
353
354 imageData[offset+rDstIndex] = float(r) / 255.0f;
355 imageData[offset+gDstIndex] = float(g) / 255.0f;
356 imageData[offset+bDstIndex] = float(b) / 255.0f;
357 offset += 3;
358 }
359 }
360
361 return imageData;
telsoa01c577f2c2018-08-31 09:22:23 +0100362}