blob: d2fe1649f3f4aee564ccc6254a3e7358e12ef0d2 [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5#include "MnistDatabase.hpp"
6
7#include <boost/numeric/conversion/cast.hpp>
8#include <boost/log/trivial.hpp>
9#include <boost/assert.hpp>
10#include <fstream>
11#include <vector>
12
13constexpr int g_kMnistImageByteSize = 28 * 28;
14
15void EndianSwap(unsigned int &x)
16{
17 x = (x >> 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | (x << 24);
18}
19
20MnistDatabase::MnistDatabase(const std::string& binaryFileDirectory, bool scaleValues)
21 : m_BinaryDirectory(binaryFileDirectory)
22 , m_ScaleValues(scaleValues)
23{
24}
25
26std::unique_ptr<MnistDatabase::TTestCaseData> MnistDatabase::GetTestCaseData(unsigned int testCaseId)
27{
28 std::vector<unsigned char> I(g_kMnistImageByteSize);
29 unsigned int label = 0;
30
31 std::string imagePath = m_BinaryDirectory + std::string("t10k-images.idx3-ubyte");
32 std::string labelPath = m_BinaryDirectory + std::string("t10k-labels.idx1-ubyte");
33
34 std::ifstream imageStream(imagePath, std::ios::binary);
35 std::ifstream labelStream(labelPath, std::ios::binary);
36
37 if (!imageStream.is_open())
38 {
39 BOOST_LOG_TRIVIAL(fatal) << "Failed to load " << imagePath;
40 return nullptr;
41 }
42 if (!labelStream.is_open())
43 {
44 BOOST_LOG_TRIVIAL(fatal) << "Failed to load " << imagePath;
45 return nullptr;
46 }
47
48 unsigned int magic, num, row, col;
49
telsoa01c577f2c2018-08-31 09:22:23 +010050 // Checks the files have the correct header.
telsoa014fcda012018-03-09 14:13:49 +000051 imageStream.read(reinterpret_cast<char*>(&magic), sizeof(magic));
52 if (magic != 0x03080000)
53 {
54 BOOST_LOG_TRIVIAL(fatal) << "Failed to read " << imagePath;
55 return nullptr;
56 }
57 labelStream.read(reinterpret_cast<char*>(&magic), sizeof(magic));
58 if (magic != 0x01080000)
59 {
60 BOOST_LOG_TRIVIAL(fatal) << "Failed to read " << labelPath;
61 return nullptr;
62 }
63
telsoa01c577f2c2018-08-31 09:22:23 +010064 // Endian swaps the image and label file - all the integers in the files are stored in MSB first(high endian)
65 // format, hence it needs to flip the bytes of the header if using it on Intel processors or low-endian machines
telsoa014fcda012018-03-09 14:13:49 +000066 labelStream.read(reinterpret_cast<char*>(&num), sizeof(num));
67 imageStream.read(reinterpret_cast<char*>(&num), sizeof(num));
68 EndianSwap(num);
69 imageStream.read(reinterpret_cast<char*>(&row), sizeof(row));
70 EndianSwap(row);
71 imageStream.read(reinterpret_cast<char*>(&col), sizeof(col));
72 EndianSwap(col);
73
telsoa01c577f2c2018-08-31 09:22:23 +010074 // Reads image and label into memory.
telsoa014fcda012018-03-09 14:13:49 +000075 imageStream.seekg(testCaseId * g_kMnistImageByteSize, std::ios_base::cur);
76 imageStream.read(reinterpret_cast<char*>(&I[0]), g_kMnistImageByteSize);
77 labelStream.seekg(testCaseId, std::ios_base::cur);
78 labelStream.read(reinterpret_cast<char*>(&label), 1);
79
80 if (!imageStream.good())
81 {
82 BOOST_LOG_TRIVIAL(fatal) << "Failed to read " << imagePath;
83 return nullptr;
84 }
85 if (!labelStream.good())
86 {
87 BOOST_LOG_TRIVIAL(fatal) << "Failed to read " << labelPath;
88 return nullptr;
89 }
90
91 std::vector<float> inputImageData;
92 inputImageData.resize(g_kMnistImageByteSize);
93
94 for (unsigned int i = 0; i < col * row; ++i)
95 {
96 inputImageData[i] = boost::numeric_cast<float>(I[i]);
97
98 if(m_ScaleValues)
99 {
100 inputImageData[i] /= 255.0f;
101 }
102 }
103
104 return std::make_unique<TTestCaseData>(label, std::move(inputImageData));
105}