Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 1 | /* |
| 2 | Copyright 2017 Leon Merten Lohse |
| 3 | |
| 4 | Permission is hereby granted, free of charge, to any person obtaining a copy |
| 5 | of this software and associated documentation files (the "Software"), to deal |
| 6 | in the Software without restriction, including without limitation the rights |
| 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 8 | copies of the Software, and to permit persons to whom the Software is |
| 9 | furnished to do so, subject to the following conditions: |
| 10 | |
| 11 | The above copyright notice and this permission notice shall be included in |
| 12 | all copies or substantial portions of the Software. |
| 13 | |
| 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 20 | SOFTWARE. |
| 21 | */ |
| 22 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 23 | #ifndef NPY_H |
| 24 | #define NPY_H |
| 25 | |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 26 | #include <complex> |
| 27 | #include <fstream> |
| 28 | #include <string> |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 29 | #include <iostream> |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 30 | #include <sstream> |
| 31 | #include <cstdint> |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 32 | #include <cstring> |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 33 | #include <vector> |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 34 | #include <stdexcept> |
| 35 | #include <algorithm> |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 36 | #include <regex> |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 37 | #include <unordered_map> |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 38 | |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 39 | |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 40 | namespace npy { |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 41 | |
| 42 | /* Compile-time test for byte order. |
| 43 | If your compiler does not define these per default, you may want to define |
| 44 | one of these constants manually. |
| 45 | Defaults to little endian order. */ |
| 46 | #if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN || \ |
| 47 | defined(__BIG_ENDIAN__) || \ |
| 48 | defined(__ARMEB__) || \ |
| 49 | defined(__THUMBEB__) || \ |
| 50 | defined(__AARCH64EB__) || \ |
| 51 | defined(_MIBSEB) || defined(__MIBSEB) || defined(__MIBSEB__) |
| 52 | const bool big_endian = true; |
| 53 | #else |
| 54 | const bool big_endian = false; |
| 55 | #endif |
| 56 | |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 57 | |
| 58 | const char magic_string[] = "\x93NUMPY"; |
| 59 | const size_t magic_string_length = 6; |
| 60 | |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 61 | const char little_endian_char = '<'; |
| 62 | const char big_endian_char = '>'; |
| 63 | const char no_endian_char = '|'; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 64 | |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 65 | constexpr char host_endian_char = ( big_endian ? |
| 66 | big_endian_char : |
| 67 | little_endian_char ); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 68 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 69 | /* npy array length */ |
| 70 | typedef unsigned long int ndarray_len_t; |
| 71 | |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 72 | inline void write_magic(std::ostream& ostream, unsigned char v_major=1, unsigned char v_minor=0) { |
| 73 | ostream.write(magic_string, magic_string_length); |
| 74 | ostream.put(v_major); |
| 75 | ostream.put(v_minor); |
| 76 | } |
| 77 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 78 | inline void read_magic(std::istream& istream, unsigned char& v_major, unsigned char& v_minor) { |
| 79 | char buf[magic_string_length+2]; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 80 | istream.read(buf, magic_string_length+2); |
| 81 | |
| 82 | if(!istream) { |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 83 | throw std::runtime_error("io error: failed reading file"); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 84 | } |
| 85 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 86 | if (0 != std::memcmp(buf, magic_string, magic_string_length)) |
| 87 | throw std::runtime_error("this file does not have a valid npy format."); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 88 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 89 | v_major = buf[magic_string_length]; |
| 90 | v_minor = buf[magic_string_length+1]; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 91 | } |
| 92 | |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 93 | // typestring magic |
| 94 | struct Typestring { |
| 95 | private: |
| 96 | char c_endian; |
| 97 | char c_type; |
| 98 | int len; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 99 | |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 100 | public: |
| 101 | inline std::string str() { |
| 102 | const size_t max_buflen = 16; |
| 103 | char buf[max_buflen]; |
| 104 | std::sprintf(buf, "%c%c%u", c_endian, c_type, len); |
| 105 | return std::string(buf); |
| 106 | } |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 107 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 108 | Typestring(const std::vector<float>& v) |
| 109 | :c_endian {host_endian_char}, c_type {'f'}, len {sizeof(float)} {} |
| 110 | Typestring(const std::vector<double>& v) |
| 111 | :c_endian {host_endian_char}, c_type {'f'}, len {sizeof(double)} {} |
| 112 | Typestring(const std::vector<long double>& v) |
| 113 | :c_endian {host_endian_char}, c_type {'f'}, len {sizeof(long double)} {} |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 114 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 115 | Typestring(const std::vector<char>& v) |
| 116 | :c_endian {no_endian_char}, c_type {'i'}, len {sizeof(char)} {} |
| 117 | Typestring(const std::vector<short>& v) |
| 118 | :c_endian {host_endian_char}, c_type {'i'}, len {sizeof(short)} {} |
| 119 | Typestring(const std::vector<int>& v) |
| 120 | :c_endian {host_endian_char}, c_type {'i'}, len {sizeof(int)} {} |
| 121 | Typestring(const std::vector<long>& v) |
| 122 | :c_endian {host_endian_char}, c_type {'i'}, len {sizeof(long)} {} |
| 123 | Typestring(const std::vector<long long>& v) :c_endian {host_endian_char}, c_type {'i'}, len {sizeof(long long)} {} |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 124 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 125 | Typestring(const std::vector<unsigned char>& v) |
| 126 | :c_endian {no_endian_char}, c_type {'u'}, len {sizeof(unsigned char)} {} |
| 127 | Typestring(const std::vector<unsigned short>& v) |
| 128 | :c_endian {host_endian_char}, c_type {'u'}, len {sizeof(unsigned short)} {} |
| 129 | Typestring(const std::vector<unsigned int>& v) |
| 130 | :c_endian {host_endian_char}, c_type {'u'}, len {sizeof(unsigned int)} {} |
| 131 | Typestring(const std::vector<unsigned long>& v) |
| 132 | :c_endian {host_endian_char}, c_type {'u'}, len {sizeof(unsigned long)} {} |
| 133 | Typestring(const std::vector<unsigned long long>& v) |
| 134 | :c_endian {host_endian_char}, c_type {'u'}, len {sizeof(unsigned long long)} {} |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 135 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 136 | Typestring(const std::vector<std::complex<float>>& v) |
| 137 | :c_endian {host_endian_char}, c_type {'c'}, len {sizeof(std::complex<float>)} {} |
| 138 | Typestring(const std::vector<std::complex<double>>& v) |
| 139 | :c_endian {host_endian_char}, c_type {'c'}, len {sizeof(std::complex<double>)} {} |
| 140 | Typestring(const std::vector<std::complex<long double>>& v) |
| 141 | :c_endian {host_endian_char}, c_type {'c'}, len {sizeof(std::complex<long double>)} {} |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 142 | }; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 143 | |
| 144 | inline void parse_typestring( std::string typestring){ |
| 145 | std::regex re ("'([<>|])([ifuc])(\\d+)'"); |
| 146 | std::smatch sm; |
| 147 | |
| 148 | std::regex_match(typestring, sm, re ); |
| 149 | |
| 150 | if ( sm.size() != 4 ) { |
| 151 | throw std::runtime_error("invalid typestring"); |
| 152 | } |
| 153 | } |
| 154 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 155 | namespace pyparse { |
| 156 | |
| 157 | /** |
| 158 | Removes leading and trailing whitespaces |
| 159 | */ |
| 160 | inline std::string trim(const std::string& str) { |
| 161 | const std::string whitespace = " \t"; |
| 162 | auto begin = str.find_first_not_of(whitespace); |
| 163 | |
| 164 | if (begin == std::string::npos) |
| 165 | return ""; |
| 166 | |
| 167 | auto end = str.find_last_not_of(whitespace); |
| 168 | |
| 169 | return str.substr(begin, end-begin+1); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 170 | } |
| 171 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 172 | |
| 173 | inline std::string get_value_from_map(const std::string& mapstr) { |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 174 | size_t sep_pos = mapstr.find_first_of(":"); |
| 175 | if (sep_pos == std::string::npos) |
| 176 | return ""; |
| 177 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 178 | std::string tmp = mapstr.substr(sep_pos+1); |
| 179 | return trim(tmp); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 180 | } |
| 181 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 182 | /** |
| 183 | Parses the string representation of a Python dict |
| 184 | |
| 185 | The keys need to be known and may not appear anywhere else in the data. |
| 186 | */ |
| 187 | inline std::unordered_map<std::string, std::string> parse_dict(std::string in, std::vector<std::string>& keys) { |
| 188 | |
| 189 | std::unordered_map<std::string, std::string> map; |
| 190 | |
| 191 | if (keys.size() == 0) |
| 192 | return map; |
| 193 | |
| 194 | in = trim(in); |
| 195 | |
| 196 | // unwrap dictionary |
| 197 | if ((in.front() == '{') && (in.back() == '}')) |
| 198 | in = in.substr(1, in.length()-2); |
| 199 | else |
| 200 | throw std::runtime_error("Not a Python dictionary."); |
| 201 | |
| 202 | std::vector<std::pair<size_t, std::string>> positions; |
| 203 | |
| 204 | for (auto const& value : keys) { |
| 205 | size_t pos = in.find( "'" + value + "'" ); |
| 206 | |
| 207 | if (pos == std::string::npos) |
| 208 | throw std::runtime_error("Missing '"+value+"' key."); |
| 209 | |
| 210 | std::pair<size_t, std::string> position_pair { pos, value }; |
| 211 | positions.push_back(position_pair); |
| 212 | } |
| 213 | |
| 214 | // sort by position in dict |
| 215 | std::sort(positions.begin(), positions.end() ); |
| 216 | |
| 217 | for(size_t i = 0; i < positions.size(); ++i) { |
| 218 | std::string raw_value; |
| 219 | size_t begin { positions[i].first }; |
| 220 | size_t end { std::string::npos }; |
| 221 | |
| 222 | std::string key = positions[i].second; |
| 223 | |
| 224 | if ( i+1 < positions.size() ) |
| 225 | end = positions[i+1].first; |
| 226 | |
| 227 | raw_value = in.substr(begin, end-begin); |
| 228 | |
| 229 | raw_value = trim(raw_value); |
| 230 | |
| 231 | if (raw_value.back() == ',') |
| 232 | raw_value.pop_back(); |
| 233 | |
| 234 | map[key] = get_value_from_map(raw_value); |
| 235 | } |
| 236 | |
| 237 | return map; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 238 | } |
| 239 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 240 | /** |
| 241 | Parses the string representation of a Python boolean |
| 242 | */ |
| 243 | inline bool parse_bool(const std::string& in) { |
| 244 | if (in == "True") |
| 245 | return true; |
| 246 | if (in == "False") |
| 247 | return false; |
| 248 | |
| 249 | throw std::runtime_error("Invalid python boolan."); |
| 250 | } |
| 251 | |
| 252 | /** |
| 253 | Parses the string representation of a Python str |
| 254 | */ |
| 255 | inline std::string parse_str(const std::string& in) { |
| 256 | if ((in.front() == '\'') && (in.back() == '\'')) |
| 257 | return in.substr(1, in.length()-2); |
| 258 | |
| 259 | throw std::runtime_error("Invalid python string."); |
| 260 | } |
| 261 | |
| 262 | /** |
| 263 | Parses the string represenatation of a Python tuple into a vector of its items |
| 264 | */ |
| 265 | inline std::vector<std::string> parse_tuple(std::string in) { |
| 266 | std::vector<std::string> v; |
| 267 | const char seperator = ','; |
| 268 | |
| 269 | in = trim(in); |
| 270 | |
| 271 | if ((in.front() == '(') && (in.back() == ')')) |
| 272 | in = in.substr(1, in.length()-2); |
| 273 | else |
| 274 | throw std::runtime_error("Invalid Python tuple."); |
| 275 | |
| 276 | std::istringstream iss(in); |
| 277 | |
| 278 | for (std::string token; std::getline(iss, token, seperator);) { |
| 279 | v.push_back(token); |
| 280 | } |
| 281 | |
| 282 | return v; |
| 283 | } |
| 284 | |
| 285 | template <typename T> |
| 286 | inline std::string write_tuple(const std::vector<T>& v) { |
| 287 | if (v.size() == 0) |
| 288 | return ""; |
| 289 | |
| 290 | std::ostringstream ss; |
| 291 | |
| 292 | if (v.size() == 1) { |
| 293 | ss << "(" << v.front() << ",)"; |
| 294 | } else { |
| 295 | const std::string delimiter = ", "; |
| 296 | // v.size() > 1 |
| 297 | ss << "("; |
| 298 | std::copy(v.begin(), v.end()-1, std::ostream_iterator<T>(ss, delimiter.c_str())); |
| 299 | ss << v.back(); |
| 300 | ss << ")"; |
| 301 | } |
| 302 | |
| 303 | return ss.str(); |
| 304 | } |
| 305 | |
| 306 | inline std::string write_boolean(bool b) { |
| 307 | if(b) |
| 308 | return "True"; |
| 309 | else |
| 310 | return "False"; |
| 311 | } |
| 312 | |
| 313 | } // namespace pyparse |
| 314 | |
| 315 | |
| 316 | inline void parse_header(std::string header, std::string& descr, bool& fortran_order, std::vector<ndarray_len_t>& shape) { |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 317 | /* |
| 318 | The first 6 bytes are a magic string: exactly "x93NUMPY". |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 319 | The next 1 byte is an unsigned byte: the major version number of the file format, e.g. x01. |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 320 | The next 1 byte is an unsigned byte: the minor version number of the file format, e.g. x00. Note: the version of the file format is not tied to the version of the numpy package. |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 321 | The next 2 bytes form a little-endian unsigned short int: the length of the header data HEADER_LEN. |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 322 | The next HEADER_LEN bytes form the header data describing the array's format. It is an ASCII string which contains a Python literal expression of a dictionary. It is terminated by a newline ('n') and padded with spaces ('x20') to make the total length of the magic string + 4 + HEADER_LEN be evenly divisible by 16 for alignment purposes. |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 323 | The dictionary contains three keys: |
| 324 | |
| 325 | "descr" : dtype.descr |
| 326 | An object that can be passed as an argument to the numpy.dtype() constructor to create the array's dtype. |
| 327 | "fortran_order" : bool |
| 328 | Whether the array data is Fortran-contiguous or not. Since Fortran-contiguous arrays are a common form of non-C-contiguity, we allow them to be written directly to disk for efficiency. |
| 329 | "shape" : tuple of int |
| 330 | The shape of the array. |
| 331 | For repeatability and readability, this dictionary is formatted using pprint.pformat() so the keys are in alphabetic order. |
| 332 | */ |
| 333 | |
| 334 | // remove trailing newline |
| 335 | if (header.back() != '\n') |
| 336 | throw std::runtime_error("invalid header"); |
| 337 | header.pop_back(); |
| 338 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 339 | // parse the dictionary |
| 340 | std::vector<std::string> keys { "descr", "fortran_order", "shape" }; |
| 341 | auto dict_map = npy::pyparse::parse_dict(header, keys); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 342 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 343 | if (dict_map.size() == 0) |
| 344 | throw std::runtime_error("invalid dictionary in header"); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 345 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 346 | std::string descr_s = dict_map["descr"]; |
| 347 | std::string fortran_s = dict_map["fortran_order"]; |
| 348 | std::string shape_s = dict_map["shape"]; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 349 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 350 | // TODO: extract info from typestring |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 351 | parse_typestring(descr_s); |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 352 | // remove |
| 353 | descr = npy::pyparse::parse_str(descr_s); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 354 | |
| 355 | // convert literal Python bool to C++ bool |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 356 | fortran_order = npy::pyparse::parse_bool(fortran_s); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 357 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 358 | // parse the shape tuple |
| 359 | auto shape_v = npy::pyparse::parse_tuple(shape_s); |
| 360 | if (shape_v.size() == 0) |
| 361 | throw std::runtime_error("invalid shape tuple in header"); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 362 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 363 | for ( auto item : shape_v ) { |
| 364 | std::stringstream stream(item); |
| 365 | unsigned long value; |
| 366 | stream >> value; |
| 367 | ndarray_len_t dim = static_cast<ndarray_len_t>(value); |
| 368 | shape.push_back(dim); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 369 | } |
| 370 | } |
| 371 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 372 | |
| 373 | inline std::string write_header_dict(const std::string& descr, bool fortran_order, const std::vector<ndarray_len_t>& shape) { |
| 374 | std::string s_fortran_order = npy::pyparse::write_boolean(fortran_order); |
| 375 | std::string shape_s = npy::pyparse::write_tuple(shape); |
| 376 | |
| 377 | return "{'descr': '" + descr + "', 'fortran_order': " + s_fortran_order + ", 'shape': " + shape_s + ", }"; |
| 378 | } |
| 379 | |
| 380 | inline void write_header(std::ostream& out, const std::string& descr, bool fortran_order, const std::vector<ndarray_len_t>& shape_v) |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 381 | { |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 382 | std::string header_dict = write_header_dict(descr, fortran_order, shape_v); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 383 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 384 | size_t length = magic_string_length + 2 + 2 + header_dict.length() + 1; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 385 | |
| 386 | unsigned char version[2] = {1, 0}; |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 387 | if (length >= 255*255) { |
| 388 | length = magic_string_length + 2 + 4 + header_dict.length() + 1; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 389 | version[0] = 2; |
| 390 | version[1] = 0; |
| 391 | } |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 392 | size_t padding_len = 16 - length % 16; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 393 | std::string padding (padding_len, ' '); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 394 | |
| 395 | // write magic |
| 396 | write_magic(out, version[0], version[1]); |
| 397 | |
| 398 | // write header length |
| 399 | if (version[0] == 1 && version[1] == 0) { |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 400 | char header_len_le16[2]; |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 401 | uint16_t header_len = header_dict.length() + padding.length() + 1; |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 402 | |
| 403 | header_len_le16[0] = (header_len >> 0) & 0xff; |
| 404 | header_len_le16[1] = (header_len >> 8) & 0xff; |
| 405 | out.write(reinterpret_cast<char *>(header_len_le16), 2); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 406 | }else{ |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 407 | char header_len_le32[4]; |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 408 | uint32_t header_len = header_dict.length() + padding.length() + 1; |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 409 | |
| 410 | header_len_le32[0] = (header_len >> 0) & 0xff; |
| 411 | header_len_le32[1] = (header_len >> 8) & 0xff; |
| 412 | header_len_le32[2] = (header_len >> 16) & 0xff; |
| 413 | header_len_le32[3] = (header_len >> 24) & 0xff; |
| 414 | out.write(reinterpret_cast<char *>(header_len_le32), 4); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 415 | } |
| 416 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 417 | out << header_dict << padding << '\n'; |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 418 | } |
| 419 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 420 | inline std::string read_header(std::istream& istream) { |
| 421 | // check magic bytes an version number |
| 422 | unsigned char v_major, v_minor; |
| 423 | read_magic(istream, v_major, v_minor); |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 424 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 425 | uint32_t header_length; |
| 426 | if(v_major == 1 && v_minor == 0){ |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 427 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 428 | char header_len_le16[2]; |
| 429 | istream.read(header_len_le16, 2); |
| 430 | header_length = (header_len_le16[0] << 0) | (header_len_le16[1] << 8); |
| 431 | |
| 432 | if((magic_string_length + 2 + 2 + header_length) % 16 != 0) { |
| 433 | // TODO: display warning |
| 434 | } |
| 435 | }else if(v_major == 2 && v_minor == 0) { |
| 436 | char header_len_le32[4]; |
| 437 | istream.read(header_len_le32, 4); |
| 438 | |
| 439 | header_length = (header_len_le32[0] << 0) | (header_len_le32[1] << 8) |
| 440 | | (header_len_le32[2] << 16) | (header_len_le32[3] << 24); |
| 441 | |
| 442 | if((magic_string_length + 2 + 4 + header_length) % 16 != 0) { |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 443 | // TODO: display warning |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 444 | } |
| 445 | }else{ |
| 446 | throw std::runtime_error("unsupported file format version"); |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 447 | } |
| 448 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 449 | auto buf_v = std::vector<char>(); |
| 450 | buf_v.reserve(header_length); |
| 451 | istream.read(buf_v.data(), header_length); |
| 452 | std::string header(buf_v.data(), header_length); |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 453 | |
| 454 | return header; |
| 455 | } |
| 456 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 457 | inline ndarray_len_t comp_size(const std::vector<ndarray_len_t>& shape) { |
| 458 | ndarray_len_t size = 1; |
| 459 | for (ndarray_len_t i : shape ) |
| 460 | size *= i; |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 461 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 462 | return size; |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 463 | } |
| 464 | |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 465 | template<typename Scalar> |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 466 | inline void SaveArrayAsNumpy( const std::string& filename, bool fortran_order, unsigned int n_dims, const unsigned long shape[], const std::vector<Scalar>& data) |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 467 | { |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 468 | Typestring typestring_o(data); |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 469 | std::string typestring = typestring_o.str(); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 470 | |
| 471 | std::ofstream stream( filename, std::ofstream::binary); |
| 472 | if(!stream) { |
| 473 | throw std::runtime_error("io error: failed to open a file."); |
| 474 | } |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 475 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 476 | std::vector<ndarray_len_t> shape_v(shape, shape+n_dims); |
| 477 | write_header(stream, typestring, fortran_order, shape_v); |
| 478 | |
| 479 | auto size = static_cast<size_t>(comp_size(shape_v)); |
| 480 | |
| 481 | stream.write(reinterpret_cast<const char*>(data.data()), sizeof(Scalar) * size); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 482 | } |
| 483 | |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 484 | |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 485 | template<typename Scalar> |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 486 | inline void LoadArrayFromNumpy(const std::string& filename, std::vector<unsigned long>& shape, std::vector<Scalar>& data) |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 487 | { |
| 488 | std::ifstream stream(filename, std::ifstream::binary); |
| 489 | if(!stream) { |
| 490 | throw std::runtime_error("io error: failed to open a file."); |
| 491 | } |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 492 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 493 | std::string header = read_header(stream); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 494 | |
| 495 | // parse header |
| 496 | bool fortran_order; |
| 497 | std::string typestr; |
| 498 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 499 | parse_header(header, typestr, fortran_order, shape); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 500 | |
| 501 | // check if the typestring matches the given one |
Anthony Barbier | 3c5b4ff | 2017-10-12 13:20:52 +0100 | [diff] [blame] | 502 | Typestring typestring_o {data}; |
| 503 | std::string expect_typestr = typestring_o.str(); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 504 | if (typestr != expect_typestr) { |
| 505 | throw std::runtime_error("formatting error: typestrings not matching"); |
| 506 | } |
| 507 | |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 508 | |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 509 | // compute the data size based on the shape |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 510 | auto size = static_cast<size_t>(comp_size(shape)); |
| 511 | data.resize(size); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 512 | |
| 513 | // read the data |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 514 | stream.read(reinterpret_cast<char*>(data.data()), sizeof(Scalar)*size); |
Anthony Barbier | a3adb3a | 2017-09-13 16:03:39 +0100 | [diff] [blame] | 515 | } |
| 516 | |
| 517 | } // namespace npy |
Anthony Barbier | 87f21cd | 2017-11-10 16:27:32 +0000 | [diff] [blame] | 518 | |
| 519 | #endif // NPY_H |