blob: 844004bf718d8659b91be4a75696c08277d19f01 [file] [log] [blame]
Colm Donelan0dfb2652023-06-22 10:19:17 +01001//
2// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "FileComparisonExecutor.hpp"
7#include <NetworkExecutionUtils/NetworkExecutionUtils.hpp>
8#include <algorithm>
Colm Donelan94149362023-07-03 16:19:44 +01009#include <ghc/filesystem.hpp>
Colm Donelan0dfb2652023-06-22 10:19:17 +010010#include <iterator>
11
12using namespace armnn;
13
14/**
15 * Given a buffer in the expected format. Extract from it the tensor name, tensor type as strings and return an
16 * index pointing to the start of the data section.
17 *
18 * @param buffer data to be parsed.
19 * @param tensorName the name of the tensor extracted from the header.
20 * @param tensorType the type of the tensor extracted from the header.
21 * @return index pointing to the start of the data in the buffer.
22 */
23unsigned int ExtractHeader(const std::vector<char>& buffer, std::string& tensorName, DataType& tensorType)
24{
25 auto isColon = [](char c) { return c == ':'; };
26 auto isComma = [](char c) { return c == ','; };
27
28 // Find the "," separator marks the end of the tensor name.
29 auto firstComma = std::find_if(buffer.begin(), buffer.end(), isComma);
30 if (firstComma == buffer.end())
31 {
32 throw ParseException("Unable to read tensor name from file.");
33 }
34 tensorName.assign(buffer.begin(), firstComma);
35
36 // The next colon marks the end of the data type string.
37 auto endOfHeader = std::find_if(firstComma, buffer.end(), isColon);
38 if (firstComma == buffer.end())
39 {
40 throw ParseException("Unable to read tensor type from file.");
41 }
42 std::string type(++firstComma, endOfHeader);
43 // Remove any leading or trailing whitespace.
44 type.erase(remove_if(type.begin(), type.end(), isspace), type.end());
45 if (type == "Float16")
46 {
47 tensorType = DataType::Float16;
48 }
49 else if (type == "Float32")
50 {
51 tensorType = DataType::Float32;
52 }
53 else if (type == "QAsymmU8")
54 {
55 tensorType = DataType::QAsymmU8;
56 }
57 else if (type == "Signed32")
58 {
59 tensorType = DataType::Signed32;
60 }
61 else if (type == "Boolean")
62 {
63 tensorType = DataType::Boolean;
64 }
65 else if (type == "QSymmS16")
66 {
67 tensorType = DataType::QSymmS16;
68 }
69 else if (type == "QSymmS8")
70 {
71 tensorType = DataType::QSymmS8;
72 }
73 else if (type == "QAsymmS8")
74 {
75 tensorType = DataType::QAsymmS8;
76 }
77 else if (type == "BFloat16")
78 {
79 tensorType = DataType::BFloat16;
80 }
81 else if (type == "Signed64")
82 {
83 tensorType = DataType::Signed64;
84 }
85 else
86 {
87 throw ParseException("Invalid data type in header.");
88 }
89 // Remember to move the iterator past the colon.
Colm Donelan5ea6e3f2023-12-08 15:49:54 +000090 endOfHeader++;
91 // Breaking this into two parts to avoid an awkward gcc compiler problem.
92 auto dataStart = endOfHeader - buffer.begin();
93 return static_cast<unsigned int>(dataStart);
Colm Donelan0dfb2652023-06-22 10:19:17 +010094}
95
96/**
97 * Extract the data from the file and return as a typed vector of elements.
98 *
99 * @param buffer data to be parsed.
100 * @param dataStart Index into the vector where the tensor data starts.
101 * @param tensorType the type of the tensor extracted from the header.
102 */
103template <typename T>
104void ReadData(const std::vector<char>& buffer,
105 const unsigned int dataStart,
106 const DataType& tensorType,
107 std::vector<T>& results)
108{
109 unsigned int index = dataStart;
110 while (index < buffer.size())
111 {
112 std::string elementString;
113 // Extract into a string until the next space.
114 while (index < buffer.size() && buffer[index] != ' ')
115 {
116 elementString.push_back(buffer[index]);
117 index++;
118 }
119 if (!elementString.empty())
120 {
121 switch (tensorType)
122 {
123 case DataType::Float32: {
Colm Donelan5ea6e3f2023-12-08 15:49:54 +0000124 results.push_back(static_cast<T>(std::stof(elementString)));
Colm Donelan0dfb2652023-06-22 10:19:17 +0100125 break;
126 }
127
128 case DataType::Signed32: {
Colm Donelan5ea6e3f2023-12-08 15:49:54 +0000129 results.push_back(static_cast<T>(std::stoi(elementString)));
Colm Donelan0dfb2652023-06-22 10:19:17 +0100130 break;
131 }
132 case DataType::QSymmS8:
133 case DataType::QAsymmS8: {
Colm Donelan5ea6e3f2023-12-08 15:49:54 +0000134 results.push_back(static_cast<T>(elementString[0]));
Colm Donelan0dfb2652023-06-22 10:19:17 +0100135 break;
136 }
137 case DataType::QAsymmU8: {
Colm Donelan5ea6e3f2023-12-08 15:49:54 +0000138 results.push_back(static_cast<T>(elementString[0]));
Colm Donelan0dfb2652023-06-22 10:19:17 +0100139 break;
140 }
141 case DataType::Float16:
142 case DataType::QSymmS16:
143 case DataType::BFloat16:
144 case DataType::Boolean:
145 case DataType::Signed64:
146 default: {
147 LogAndThrow("Unsupported DataType");
148 }
149 }
150 // Finally, skip the space we know is there.
151 index++;
152 }
153 else
154 {
155 if (index < buffer.size())
156 {
157 index++;
158 }
159 }
160 }
161}
162
163/**
164 * Open the given file and read the data out of it to construct a Tensor. This could throw FileNotFoundException
165 * or InvalidArgumentException
166 *
167 * @param fileName the file to be read.
168 * @return a populated tensor.
169 */
170Tensor ReadTensorFromFile(const std::string fileName)
171{
Colm Donelan94149362023-07-03 16:19:44 +0100172 if (!ghc::filesystem::exists(fileName))
Colm Donelan0dfb2652023-06-22 10:19:17 +0100173 {
174 throw FileNotFoundException("The file \"" + fileName + "\" could not be found.");
175 }
176 // The format we are reading in is based on NetworkExecutionUtils::WriteToFile. This could potentially
177 // be an enormous tensor. We'll limit what we can read in to 1Mb.
178 std::uintmax_t maxFileSize = 1048576;
Colm Donelan94149362023-07-03 16:19:44 +0100179 std::uintmax_t fileSize = ghc::filesystem::file_size(fileName);
Colm Donelan0dfb2652023-06-22 10:19:17 +0100180 if (fileSize > maxFileSize)
181 {
182 throw InvalidArgumentException("The file \"" + fileName + "\" exceeds max size of 1 Mb.");
183 }
184
185 // We'll read the entire file into one buffer.
186 std::ifstream file(fileName, std::ios::binary);
187 std::vector<char> buffer(fileSize);
Colm Donelan5ea6e3f2023-12-08 15:49:54 +0000188 if (file.read(buffer.data(), static_cast<std::streamsize>(fileSize)))
Colm Donelan0dfb2652023-06-22 10:19:17 +0100189 {
190 std::string tensorName;
191 DataType tensorType;
192 unsigned int tensorDataStart = ExtractHeader(buffer, tensorName, tensorType);
193 switch (tensorType)
194 {
195 case DataType::Float32: {
196 std::vector<float> floatVector;
197 ReadData(buffer, tensorDataStart, tensorType, floatVector);
198 TensorInfo info({ static_cast<unsigned int>(floatVector.size()), 1, 1, 1 }, DataType::Float32);
199 float* floats = new float[floatVector.size()];
200 memcpy(floats, floatVector.data(), (floatVector.size() * sizeof(float)));
201 return Tensor(info, floats);
202 }
203 case DataType::Signed32: {
204 std::vector<int> intVector;
205 ReadData(buffer, tensorDataStart, tensorType, intVector);
206 TensorInfo info({ static_cast<unsigned int>(intVector.size()), 1, 1, 1 }, DataType::Signed32);
207 int* ints = new int[intVector.size()];
208 memcpy(ints, intVector.data(), (intVector.size() * sizeof(float)));
209 return Tensor(info, ints);
210 }
211 case DataType::QSymmS8: {
212 std::vector<int8_t> intVector;
213 ReadData(buffer, tensorDataStart, tensorType, intVector);
214 TensorInfo info({ static_cast<unsigned int>(intVector.size()), 1, 1, 1 }, DataType::QSymmS8);
215 int8_t* ints = new int8_t[intVector.size()];
216 memcpy(ints, intVector.data(), (intVector.size() * sizeof(float)));
217 return Tensor(info, ints);
218 }
219 case DataType::QAsymmS8: {
220 std::vector<int8_t> intVector;
221 ReadData(buffer, tensorDataStart, tensorType, intVector);
222 TensorInfo info({ static_cast<unsigned int>(intVector.size()), 1, 1, 1 }, DataType::QAsymmS8);
223 int8_t* ints = new int8_t[intVector.size()];
224 memcpy(ints, intVector.data(), (intVector.size() * sizeof(float)));
225 return Tensor(info, ints);
226 }
227 case DataType::QAsymmU8: {
228 std::vector<uint8_t> intVector;
229 ReadData(buffer, tensorDataStart, tensorType, intVector);
230 TensorInfo info({ static_cast<unsigned int>(intVector.size()), 1, 1, 1 }, DataType::QAsymmU8);
231 uint8_t* ints = new uint8_t[intVector.size()];
232 memcpy(ints, intVector.data(), (intVector.size() * sizeof(float)));
233 return Tensor(info, ints);
234 }
235 default:
236 throw InvalidArgumentException("The tensor data could not be read from \"" + fileName + "\"");
237 }
238 }
239 else
240 {
241 throw ParseException("Filed to read the contents of \"" + fileName + "\"");
242 }
243
244 Tensor result;
245 return result;
246}
247
248FileComparisonExecutor::FileComparisonExecutor(const ExecuteNetworkParams& params)
249 : m_Params(params)
250{}
251
252std::vector<const void*> FileComparisonExecutor::Execute()
253{
254 std::string filesToCompare = this->m_Params.m_ComparisonFile;
255 if (filesToCompare.empty())
256 {
257 throw InvalidArgumentException("The file(s) to compare was not set.");
258 }
259 // filesToCompare is one or more files containing output tensors. Iterate and read in the tensors.
260 // We'll assume the string follows the same comma seperated format as write-outputs-to-file.
261 std::stringstream ss(filesToCompare);
262 std::vector<std::string> fileNames;
263 std::string errorString;
264 while (ss.good())
265 {
266 std::string substr;
267 getline(ss, substr, ',');
268 // Check the file exist.
Colm Donelan94149362023-07-03 16:19:44 +0100269 if (!ghc::filesystem::exists(substr))
Colm Donelan0dfb2652023-06-22 10:19:17 +0100270 {
271 errorString += substr + " ";
272 }
273 else
274 {
275 fileNames.push_back(substr);
276 }
277 }
278 if (!errorString.empty())
279 {
280 throw FileNotFoundException("The following file(s) to compare could not be found: " + errorString);
281 }
282 // Read in the tensors into m_OutputTensorsVec
283 OutputTensors outputs;
284 std::vector<const void*> results;
285 for (auto file : fileNames)
286 {
287 Tensor t = ReadTensorFromFile(file);
288 outputs.push_back({ 0, Tensor(t.GetInfo(), t.GetMemoryArea()) });
289 results.push_back(t.GetMemoryArea());
290 }
291 m_OutputTensorsVec.push_back(outputs);
292 return results;
293}
294
295void FileComparisonExecutor::PrintNetworkInfo()
296{
297 std::cout << "Not implemented in this class." << std::endl;
298}
299
300void FileComparisonExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
301{
302 unsigned int index = 0;
303 std::string typeString;
304 for (const auto& outputTensors : m_OutputTensorsVec)
305 {
306 for (const auto& outputTensor : outputTensors)
307 {
308 size_t size = outputTensor.second.GetNumBytes();
309 double result = ComputeByteLevelRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
310 std::cout << "Byte level root mean square error: " << result << "\n";
311 }
312 }
313}
314
315FileComparisonExecutor::~FileComparisonExecutor()
316{
317 // If there are tensors defined in m_OutputTensorsVec we need to clean up their memory usage.
318 for (OutputTensors opTensor : m_OutputTensorsVec)
319 {
320 for (std::pair<LayerBindingId, class Tensor> pair : opTensor)
321 {
322 Tensor t = pair.second;
323 // Based on the tensor type and size recover the memory.
324 switch (t.GetDataType())
325 {
326 case DataType::Float32:
327 delete[] static_cast<float*>(t.GetMemoryArea());
328 break;
329 case DataType::Signed32:
330 delete[] static_cast<int*>(t.GetMemoryArea());
331 break;
332 case DataType::QSymmS8:
333 delete[] static_cast<int8_t*>(t.GetMemoryArea());
334 break;
335 case DataType::QAsymmS8:
336 delete[] static_cast<int8_t*>(t.GetMemoryArea());
337 break;
338 case DataType::QAsymmU8:
339 delete[] static_cast<uint8_t*>(t.GetMemoryArea());
340 break;
341 default:
342 std::cout << "The data type wasn't created in ReadTensorFromFile" << std::endl;
343 }
344 }
345 }
346
347}