telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 1 | // half - IEEE 754-based half-precision floating point library.
|
| 2 | //
|
| 3 | // Copyright (c) 2012-2017 Christian Rau <rauy@users.sourceforge.net>
|
| 4 | //
|
| 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
| 6 | // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
| 7 | // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
|
| 8 | // Software is furnished to do so, subject to the following conditions:
|
| 9 | //
|
| 10 | // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
| 11 | //
|
| 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
| 13 | // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
| 14 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
| 15 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 16 |
|
| 17 | // Version 1.12.0
|
| 18 |
|
| 19 | /// \file
|
| 20 | /// Main header file for half precision functionality.
|
| 21 |
|
| 22 | #ifndef HALF_HALF_HPP
|
| 23 | #define HALF_HALF_HPP
|
| 24 |
|
| 25 | /// Combined gcc version number.
|
| 26 | #define HALF_GNUC_VERSION (__GNUC__*100+__GNUC_MINOR__)
|
| 27 |
|
| 28 | //check C++11 language features
|
| 29 | #if defined(__clang__) //clang
|
| 30 | #if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
|
| 31 | #define HALF_ENABLE_CPP11_STATIC_ASSERT 1
|
| 32 | #endif
|
| 33 | #if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
|
| 34 | #define HALF_ENABLE_CPP11_CONSTEXPR 1
|
| 35 | #endif
|
| 36 | #if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
|
| 37 | #define HALF_ENABLE_CPP11_NOEXCEPT 1
|
| 38 | #endif
|
| 39 | #if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
|
| 40 | #define HALF_ENABLE_CPP11_USER_LITERALS 1
|
| 41 | #endif
|
| 42 | #if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG)
|
| 43 | #define HALF_ENABLE_CPP11_LONG_LONG 1
|
| 44 | #endif
|
| 45 | /*#elif defined(__INTEL_COMPILER) //Intel C++
|
| 46 | #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) ????????
|
| 47 | #define HALF_ENABLE_CPP11_STATIC_ASSERT 1
|
| 48 | #endif
|
| 49 | #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) ????????
|
| 50 | #define HALF_ENABLE_CPP11_CONSTEXPR 1
|
| 51 | #endif
|
| 52 | #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) ????????
|
| 53 | #define HALF_ENABLE_CPP11_NOEXCEPT 1
|
| 54 | #endif
|
| 55 | #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) ????????
|
| 56 | #define HALF_ENABLE_CPP11_LONG_LONG 1
|
| 57 | #endif*/
|
| 58 | #elif defined(__GNUC__) //gcc
|
| 59 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L
|
| 60 | #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
|
| 61 | #define HALF_ENABLE_CPP11_STATIC_ASSERT 1
|
| 62 | #endif
|
| 63 | #if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
|
| 64 | #define HALF_ENABLE_CPP11_CONSTEXPR 1
|
| 65 | #endif
|
| 66 | #if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
|
| 67 | #define HALF_ENABLE_CPP11_NOEXCEPT 1
|
| 68 | #endif
|
| 69 | #if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
|
| 70 | #define HALF_ENABLE_CPP11_USER_LITERALS 1
|
| 71 | #endif
|
| 72 | #if !defined(HALF_ENABLE_CPP11_LONG_LONG)
|
| 73 | #define HALF_ENABLE_CPP11_LONG_LONG 1
|
| 74 | #endif
|
| 75 | #endif
|
| 76 | #elif defined(_MSC_VER) //Visual C++
|
| 77 | #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
|
| 78 | #define HALF_ENABLE_CPP11_CONSTEXPR 1
|
| 79 | #endif
|
| 80 | #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
|
| 81 | #define HALF_ENABLE_CPP11_NOEXCEPT 1
|
| 82 | #endif
|
| 83 | #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
|
| 84 | #define HALF_ENABLE_CPP11_USER_LITERALS 1
|
| 85 | #endif
|
| 86 | #if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
|
| 87 | #define HALF_ENABLE_CPP11_STATIC_ASSERT 1
|
| 88 | #endif
|
| 89 | #if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG)
|
| 90 | #define HALF_ENABLE_CPP11_LONG_LONG 1
|
| 91 | #endif
|
| 92 | #define HALF_POP_WARNINGS 1
|
| 93 | #pragma warning(push)
|
| 94 | #pragma warning(disable : 4099 4127 4146) //struct vs class, constant in if, negative unsigned
|
| 95 | #endif
|
| 96 |
|
| 97 | //check C++11 library features
|
| 98 | #include <utility>
|
| 99 | #if defined(_LIBCPP_VERSION) //libc++
|
| 100 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
|
| 101 | #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS
|
| 102 | #define HALF_ENABLE_CPP11_TYPE_TRAITS 1
|
| 103 | #endif
|
| 104 | #ifndef HALF_ENABLE_CPP11_CSTDINT
|
| 105 | #define HALF_ENABLE_CPP11_CSTDINT 1
|
| 106 | #endif
|
| 107 | #ifndef HALF_ENABLE_CPP11_CMATH
|
| 108 | #define HALF_ENABLE_CPP11_CMATH 1
|
| 109 | #endif
|
| 110 | #ifndef HALF_ENABLE_CPP11_HASH
|
| 111 | #define HALF_ENABLE_CPP11_HASH 1
|
| 112 | #endif
|
| 113 | #endif
|
| 114 | #elif defined(__GLIBCXX__) //libstdc++
|
| 115 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
|
| 116 | #ifdef __clang__
|
| 117 | #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS)
|
| 118 | #define HALF_ENABLE_CPP11_TYPE_TRAITS 1
|
| 119 | #endif
|
| 120 | #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT)
|
| 121 | #define HALF_ENABLE_CPP11_CSTDINT 1
|
| 122 | #endif
|
| 123 | #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH)
|
| 124 | #define HALF_ENABLE_CPP11_CMATH 1
|
| 125 | #endif
|
| 126 | #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH)
|
| 127 | #define HALF_ENABLE_CPP11_HASH 1
|
| 128 | #endif
|
| 129 | #else
|
| 130 | #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT)
|
| 131 | #define HALF_ENABLE_CPP11_CSTDINT 1
|
| 132 | #endif
|
| 133 | #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH)
|
| 134 | #define HALF_ENABLE_CPP11_CMATH 1
|
| 135 | #endif
|
| 136 | #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH)
|
| 137 | #define HALF_ENABLE_CPP11_HASH 1
|
| 138 | #endif
|
| 139 | #endif
|
| 140 | #endif
|
| 141 | #elif defined(_CPPLIB_VER) //Dinkumware/Visual C++
|
| 142 | #if _CPPLIB_VER >= 520
|
| 143 | #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS
|
| 144 | #define HALF_ENABLE_CPP11_TYPE_TRAITS 1
|
| 145 | #endif
|
| 146 | #ifndef HALF_ENABLE_CPP11_CSTDINT
|
| 147 | #define HALF_ENABLE_CPP11_CSTDINT 1
|
| 148 | #endif
|
| 149 | #ifndef HALF_ENABLE_CPP11_HASH
|
| 150 | #define HALF_ENABLE_CPP11_HASH 1
|
| 151 | #endif
|
| 152 | #endif
|
| 153 | #if _CPPLIB_VER >= 610
|
| 154 | #ifndef HALF_ENABLE_CPP11_CMATH
|
| 155 | #define HALF_ENABLE_CPP11_CMATH 1
|
| 156 | #endif
|
| 157 | #endif
|
| 158 | #endif
|
| 159 | #undef HALF_GNUC_VERSION
|
| 160 |
|
| 161 | //support constexpr
|
| 162 | #if HALF_ENABLE_CPP11_CONSTEXPR
|
| 163 | #define HALF_CONSTEXPR constexpr
|
| 164 | #define HALF_CONSTEXPR_CONST constexpr
|
| 165 | #else
|
| 166 | #define HALF_CONSTEXPR
|
| 167 | #define HALF_CONSTEXPR_CONST const
|
| 168 | #endif
|
| 169 |
|
| 170 | //support noexcept
|
| 171 | #if HALF_ENABLE_CPP11_NOEXCEPT
|
| 172 | #define HALF_NOEXCEPT noexcept
|
| 173 | #define HALF_NOTHROW noexcept
|
| 174 | #else
|
| 175 | #define HALF_NOEXCEPT
|
| 176 | #define HALF_NOTHROW throw()
|
| 177 | #endif
|
| 178 |
|
| 179 | #include <algorithm>
|
| 180 | #include <iostream>
|
| 181 | #include <limits>
|
| 182 | #include <climits>
|
| 183 | #include <cmath>
|
| 184 | #include <cstring>
|
| 185 | #include <cstdlib>
|
| 186 | #if HALF_ENABLE_CPP11_TYPE_TRAITS
|
| 187 | #include <type_traits>
|
| 188 | #endif
|
| 189 | #if HALF_ENABLE_CPP11_CSTDINT
|
| 190 | #include <cstdint>
|
| 191 | #endif
|
| 192 | #if HALF_ENABLE_CPP11_HASH
|
| 193 | #include <functional>
|
| 194 | #endif
|
| 195 |
|
| 196 |
|
| 197 | /// Default rounding mode.
|
| 198 | /// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and `float`s as well as
|
| 199 | /// for the half_cast() if not specifying a rounding mode explicitly. It can be redefined (before including half.hpp) to one
|
| 200 | /// of the standard rounding modes using their respective constants or the equivalent values of `std::float_round_style`:
|
| 201 | ///
|
| 202 | /// `std::float_round_style` | value | rounding
|
| 203 | /// ---------------------------------|-------|-------------------------
|
| 204 | /// `std::round_indeterminate` | -1 | fastest (default)
|
| 205 | /// `std::round_toward_zero` | 0 | toward zero
|
| 206 | /// `std::round_to_nearest` | 1 | to nearest
|
| 207 | /// `std::round_toward_infinity` | 2 | toward positive infinity
|
| 208 | /// `std::round_toward_neg_infinity` | 3 | toward negative infinity
|
| 209 | ///
|
| 210 | /// By default this is set to `-1` (`std::round_indeterminate`), which uses truncation (round toward zero, but with overflows
|
| 211 | /// set to infinity) and is the fastest rounding mode possible. It can even be set to `std::numeric_limits<float>::round_style`
|
| 212 | /// to synchronize the rounding mode with that of the underlying single-precision implementation.
|
| 213 | #ifndef HALF_ROUND_STYLE
|
| 214 | #define HALF_ROUND_STYLE -1 // = std::round_indeterminate
|
| 215 | #endif
|
| 216 |
|
| 217 | /// Tie-breaking behaviour for round to nearest.
|
| 218 | /// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this is
|
| 219 | /// defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way cases (and
|
| 220 | /// thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more IEEE-conformant
|
| 221 | /// behaviour is needed.
|
| 222 | #ifndef HALF_ROUND_TIES_TO_EVEN
|
| 223 | #define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero
|
| 224 | #endif
|
| 225 |
|
| 226 | /// Value signaling overflow.
|
| 227 | /// In correspondence with `HUGE_VAL[F|L]` from `<cmath>` this symbol expands to a positive value signaling the overflow of an
|
| 228 | /// operation, in particular it just evaluates to positive infinity.
|
| 229 | #define HUGE_VALH std::numeric_limits<half_float::half>::infinity()
|
| 230 |
|
| 231 | /// Fast half-precision fma function.
|
| 232 | /// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate
|
| 233 | /// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all
|
| 234 | /// arithmetic operations, this is in fact always the case.
|
| 235 | #define FP_FAST_FMAH 1
|
| 236 |
|
| 237 | #ifndef FP_ILOGB0
|
| 238 | #define FP_ILOGB0 INT_MIN
|
| 239 | #endif
|
| 240 | #ifndef FP_ILOGBNAN
|
| 241 | #define FP_ILOGBNAN INT_MAX
|
| 242 | #endif
|
| 243 | #ifndef FP_SUBNORMAL
|
| 244 | #define FP_SUBNORMAL 0
|
| 245 | #endif
|
| 246 | #ifndef FP_ZERO
|
| 247 | #define FP_ZERO 1
|
| 248 | #endif
|
| 249 | #ifndef FP_NAN
|
| 250 | #define FP_NAN 2
|
| 251 | #endif
|
| 252 | #ifndef FP_INFINITE
|
| 253 | #define FP_INFINITE 3
|
| 254 | #endif
|
| 255 | #ifndef FP_NORMAL
|
| 256 | #define FP_NORMAL 4
|
| 257 | #endif
|
| 258 |
|
| 259 |
|
| 260 | /// Main namespace for half precision functionality.
|
| 261 | /// This namespace contains all the functionality provided by the library.
|
| 262 | namespace half_float
|
| 263 | {
|
| 264 | class half;
|
| 265 |
|
| 266 | #if HALF_ENABLE_CPP11_USER_LITERALS
|
| 267 | /// Library-defined half-precision literals.
|
| 268 | /// Import this namespace to enable half-precision floating point literals:
|
| 269 | /// ~~~~{.cpp}
|
| 270 | /// using namespace half_float::literal;
|
| 271 | /// half_float::half = 4.2_h;
|
| 272 | /// ~~~~
|
| 273 | namespace literal
|
| 274 | {
|
| 275 | half operator""_h(long double);
|
| 276 | }
|
| 277 | #endif
|
| 278 |
|
| 279 | /// \internal
|
| 280 | /// \brief Implementation details.
|
| 281 | namespace detail
|
| 282 | {
|
| 283 | #if HALF_ENABLE_CPP11_TYPE_TRAITS
|
| 284 | /// Conditional type.
|
| 285 | template<bool B,typename T,typename F> struct conditional : std::conditional<B,T,F> {};
|
| 286 |
|
| 287 | /// Helper for tag dispatching.
|
| 288 | template<bool B> struct bool_type : std::integral_constant<bool,B> {};
|
| 289 | using std::true_type;
|
| 290 | using std::false_type;
|
| 291 |
|
| 292 | /// Type traits for floating point types.
|
| 293 | template<typename T> struct is_float : std::is_floating_point<T> {};
|
| 294 | #else
|
| 295 | /// Conditional type.
|
| 296 | template<bool,typename T,typename> struct conditional { typedef T type; };
|
| 297 | template<typename T,typename F> struct conditional<false,T,F> { typedef F type; };
|
| 298 |
|
| 299 | /// Helper for tag dispatching.
|
| 300 | template<bool> struct bool_type {};
|
| 301 | typedef bool_type<true> true_type;
|
| 302 | typedef bool_type<false> false_type;
|
| 303 |
|
| 304 | /// Type traits for floating point types.
|
| 305 | template<typename> struct is_float : false_type {};
|
| 306 | template<typename T> struct is_float<const T> : is_float<T> {};
|
| 307 | template<typename T> struct is_float<volatile T> : is_float<T> {};
|
| 308 | template<typename T> struct is_float<const volatile T> : is_float<T> {};
|
| 309 | template<> struct is_float<float> : true_type {};
|
| 310 | template<> struct is_float<double> : true_type {};
|
| 311 | template<> struct is_float<long double> : true_type {};
|
| 312 | #endif
|
| 313 |
|
| 314 | /// Type traits for floating point bits.
|
| 315 | template<typename T> struct bits { typedef unsigned char type; };
|
| 316 | template<typename T> struct bits<const T> : bits<T> {};
|
| 317 | template<typename T> struct bits<volatile T> : bits<T> {};
|
| 318 | template<typename T> struct bits<const volatile T> : bits<T> {};
|
| 319 |
|
| 320 | #if HALF_ENABLE_CPP11_CSTDINT
|
| 321 | /// Unsigned integer of (at least) 16 bits width.
|
| 322 | typedef std::uint_least16_t uint16;
|
| 323 |
|
| 324 | /// Unsigned integer of (at least) 32 bits width.
|
| 325 | template<> struct bits<float> { typedef std::uint_least32_t type; };
|
| 326 |
|
| 327 | /// Unsigned integer of (at least) 64 bits width.
|
| 328 | template<> struct bits<double> { typedef std::uint_least64_t type; };
|
| 329 | #else
|
| 330 | /// Unsigned integer of (at least) 16 bits width.
|
| 331 | typedef unsigned short uint16;
|
| 332 |
|
| 333 | /// Unsigned integer of (at least) 32 bits width.
|
| 334 | template<> struct bits<float> : conditional<std::numeric_limits<unsigned int>::digits>=32,unsigned int,unsigned long> {};
|
| 335 |
|
| 336 | #if HALF_ENABLE_CPP11_LONG_LONG
|
| 337 | /// Unsigned integer of (at least) 64 bits width.
|
| 338 | template<> struct bits<double> : conditional<std::numeric_limits<unsigned long>::digits>=64,unsigned long,unsigned long long> {};
|
| 339 | #else
|
| 340 | /// Unsigned integer of (at least) 64 bits width.
|
| 341 | template<> struct bits<double> { typedef unsigned long type; };
|
| 342 | #endif
|
| 343 | #endif
|
| 344 |
|
| 345 | /// Tag type for binary construction.
|
| 346 | struct binary_t {};
|
| 347 |
|
| 348 | /// Tag for binary construction.
|
| 349 | HALF_CONSTEXPR_CONST binary_t binary = binary_t();
|
| 350 |
|
| 351 | /// Temporary half-precision expression.
|
| 352 | /// This class represents a half-precision expression which just stores a single-precision value internally.
|
| 353 | struct expr
|
| 354 | {
|
| 355 | /// Conversion constructor.
|
| 356 | /// \param f single-precision value to convert
|
| 357 | explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {}
|
| 358 |
|
| 359 | /// Conversion to single-precision.
|
| 360 | /// \return single precision value representing expression value
|
| 361 | HALF_CONSTEXPR operator float() const HALF_NOEXCEPT { return value_; }
|
| 362 |
|
| 363 | private:
|
| 364 | /// Internal expression value stored in single-precision.
|
| 365 | float value_;
|
| 366 | };
|
| 367 |
|
| 368 | /// SFINAE helper for generic half-precision functions.
|
| 369 | /// This class template has to be specialized for each valid combination of argument types to provide a corresponding
|
| 370 | /// `type` member equivalent to \a T.
|
| 371 | /// \tparam T type to return
|
| 372 | template<typename T,typename,typename=void,typename=void> struct enable {};
|
| 373 | template<typename T> struct enable<T,half,void,void> { typedef T type; };
|
| 374 | template<typename T> struct enable<T,expr,void,void> { typedef T type; };
|
| 375 | template<typename T> struct enable<T,half,half,void> { typedef T type; };
|
| 376 | template<typename T> struct enable<T,half,expr,void> { typedef T type; };
|
| 377 | template<typename T> struct enable<T,expr,half,void> { typedef T type; };
|
| 378 | template<typename T> struct enable<T,expr,expr,void> { typedef T type; };
|
| 379 | template<typename T> struct enable<T,half,half,half> { typedef T type; };
|
| 380 | template<typename T> struct enable<T,half,half,expr> { typedef T type; };
|
| 381 | template<typename T> struct enable<T,half,expr,half> { typedef T type; };
|
| 382 | template<typename T> struct enable<T,half,expr,expr> { typedef T type; };
|
| 383 | template<typename T> struct enable<T,expr,half,half> { typedef T type; };
|
| 384 | template<typename T> struct enable<T,expr,half,expr> { typedef T type; };
|
| 385 | template<typename T> struct enable<T,expr,expr,half> { typedef T type; };
|
| 386 | template<typename T> struct enable<T,expr,expr,expr> { typedef T type; };
|
| 387 |
|
| 388 | /// Return type for specialized generic 2-argument half-precision functions.
|
| 389 | /// This class template has to be specialized for each valid combination of argument types to provide a corresponding
|
| 390 | /// `type` member denoting the appropriate return type.
|
| 391 | /// \tparam T first argument type
|
| 392 | /// \tparam U first argument type
|
| 393 | template<typename T,typename U> struct result : enable<expr,T,U> {};
|
| 394 | template<> struct result<half,half> { typedef half type; };
|
| 395 |
|
| 396 | /// \name Classification helpers
|
| 397 | /// \{
|
| 398 |
|
| 399 | /// Check for infinity.
|
| 400 | /// \tparam T argument type (builtin floating point type)
|
| 401 | /// \param arg value to query
|
| 402 | /// \retval true if infinity
|
| 403 | /// \retval false else
|
| 404 | template<typename T> bool builtin_isinf(T arg)
|
| 405 | {
|
| 406 | #if HALF_ENABLE_CPP11_CMATH
|
| 407 | return std::isinf(arg);
|
| 408 | #elif defined(_MSC_VER)
|
| 409 | return !::_finite(static_cast<double>(arg)) && !::_isnan(static_cast<double>(arg));
|
| 410 | #else
|
| 411 | return arg == std::numeric_limits<T>::infinity() || arg == -std::numeric_limits<T>::infinity();
|
| 412 | #endif
|
| 413 | }
|
| 414 |
|
| 415 | /// Check for NaN.
|
| 416 | /// \tparam T argument type (builtin floating point type)
|
| 417 | /// \param arg value to query
|
| 418 | /// \retval true if not a number
|
| 419 | /// \retval false else
|
| 420 | template<typename T> bool builtin_isnan(T arg)
|
| 421 | {
|
| 422 | #if HALF_ENABLE_CPP11_CMATH
|
| 423 | return std::isnan(arg);
|
| 424 | #elif defined(_MSC_VER)
|
| 425 | return ::_isnan(static_cast<double>(arg)) != 0;
|
| 426 | #else
|
| 427 | return arg != arg;
|
| 428 | #endif
|
| 429 | }
|
| 430 |
|
| 431 | /// Check sign.
|
| 432 | /// \tparam T argument type (builtin floating point type)
|
| 433 | /// \param arg value to query
|
| 434 | /// \retval true if signbit set
|
| 435 | /// \retval false else
|
| 436 | template<typename T> bool builtin_signbit(T arg)
|
| 437 | {
|
| 438 | #if HALF_ENABLE_CPP11_CMATH
|
| 439 | return std::signbit(arg);
|
| 440 | #else
|
| 441 | return arg < T() || (arg == T() && T(1)/arg < T());
|
| 442 | #endif
|
| 443 | }
|
| 444 |
|
| 445 | /// \}
|
| 446 | /// \name Conversion
|
| 447 | /// \{
|
| 448 |
|
| 449 | /// Convert IEEE single-precision to half-precision.
|
| 450 | /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
|
| 451 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 452 | /// \param value single-precision value
|
| 453 | /// \return binary representation of half-precision value
|
| 454 | template<std::float_round_style R> uint16 float2half_impl(float value, true_type)
|
| 455 | {
|
| 456 | typedef bits<float>::type uint32;
|
| 457 | uint32 bits;// = *reinterpret_cast<uint32*>(&value); //violating strict aliasing!
|
| 458 | std::memcpy(&bits, &value, sizeof(float));
|
| 459 | /* uint16 hbits = (bits>>16) & 0x8000;
|
| 460 | bits &= 0x7FFFFFFF;
|
| 461 | int exp = bits >> 23;
|
| 462 | if(exp == 255)
|
| 463 | return hbits | 0x7C00 | (0x3FF&-static_cast<unsigned>((bits&0x7FFFFF)!=0));
|
| 464 | if(exp > 142)
|
| 465 | {
|
| 466 | if(R == std::round_toward_infinity)
|
| 467 | return hbits | 0x7C00 - (hbits>>15);
|
| 468 | if(R == std::round_toward_neg_infinity)
|
| 469 | return hbits | 0x7BFF + (hbits>>15);
|
| 470 | return hbits | 0x7BFF + (R!=std::round_toward_zero);
|
| 471 | }
|
| 472 | int g, s;
|
| 473 | if(exp > 112)
|
| 474 | {
|
| 475 | g = (bits>>12) & 1;
|
| 476 | s = (bits&0xFFF) != 0;
|
| 477 | hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF);
|
| 478 | }
|
| 479 | else if(exp > 101)
|
| 480 | {
|
| 481 | int i = 125 - exp;
|
| 482 | bits = (bits&0x7FFFFF) | 0x800000;
|
| 483 | g = (bits>>i) & 1;
|
| 484 | s = (bits&((1L<<i)-1)) != 0;
|
| 485 | hbits |= bits >> (i+1);
|
| 486 | }
|
| 487 | else
|
| 488 | {
|
| 489 | g = 0;
|
| 490 | s = bits != 0;
|
| 491 | }
|
| 492 | if(R == std::round_to_nearest)
|
| 493 | #if HALF_ROUND_TIES_TO_EVEN
|
| 494 | hbits += g & (s|hbits);
|
| 495 | #else
|
| 496 | hbits += g;
|
| 497 | #endif
|
| 498 | else if(R == std::round_toward_infinity)
|
| 499 | hbits += ~(hbits>>15) & (s|g);
|
| 500 | else if(R == std::round_toward_neg_infinity)
|
| 501 | hbits += (hbits>>15) & (g|s);
|
| 502 | */ static const uint16 base_table[512] = {
|
| 503 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
| 504 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
| 505 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
| 506 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
| 507 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
| 508 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
| 509 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,
|
| 510 | 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00,
|
| 511 | 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,
|
| 512 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
| 513 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
| 514 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
| 515 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
| 516 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
| 517 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
| 518 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
| 519 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
| 520 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
| 521 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
| 522 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
| 523 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
| 524 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
| 525 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,
|
| 526 | 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00,
|
| 527 | 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00,
|
| 528 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
| 529 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
| 530 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
| 531 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
| 532 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
| 533 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
| 534 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00 };
|
| 535 | static const unsigned char shift_table[512] = {
|
| 536 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 537 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 538 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 539 | 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
| 540 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 541 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 542 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 543 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
|
| 544 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 545 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 546 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 547 | 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
| 548 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 549 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 550 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
| 551 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 };
|
| 552 | uint16 hbits = base_table[bits>>23] + static_cast<uint16>((bits&0x7FFFFF)>>shift_table[bits>>23]);
|
| 553 | if(R == std::round_to_nearest)
|
| 554 | hbits += (((bits&0x7FFFFF)>>(shift_table[bits>>23]-1))|(((bits>>23)&0xFF)==102)) & ((hbits&0x7C00)!=0x7C00)
|
| 555 | #if HALF_ROUND_TIES_TO_EVEN
|
| 556 | & (((((static_cast<uint32>(1)<<(shift_table[bits>>23]-1))-1)&bits)!=0)|hbits)
|
| 557 | #endif
|
| 558 | ;
|
| 559 | else if(R == std::round_toward_zero)
|
| 560 | hbits -= ((hbits&0x7FFF)==0x7C00) & ~shift_table[bits>>23];
|
| 561 | else if(R == std::round_toward_infinity)
|
| 562 | hbits += ((((bits&0x7FFFFF&((static_cast<uint32>(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=102)&
|
| 563 | ((bits>>23)!=0)))&(hbits<0x7C00)) - ((hbits==0xFC00)&((bits>>23)!=511));
|
| 564 | else if(R == std::round_toward_neg_infinity)
|
| 565 | hbits += ((((bits&0x7FFFFF&((static_cast<uint32>(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=358)&
|
| 566 | ((bits>>23)!=256)))&(hbits<0xFC00)&(hbits>>15)) - ((hbits==0x7C00)&((bits>>23)!=255));
|
| 567 | return hbits;
|
| 568 | }
|
| 569 |
|
| 570 | /// Convert IEEE double-precision to half-precision.
|
| 571 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 572 | /// \param value double-precision value
|
| 573 | /// \return binary representation of half-precision value
|
| 574 | template<std::float_round_style R> uint16 float2half_impl(double value, true_type)
|
| 575 | {
|
| 576 | typedef bits<float>::type uint32;
|
| 577 | typedef bits<double>::type uint64;
|
| 578 | uint64 bits;// = *reinterpret_cast<uint64*>(&value); //violating strict aliasing!
|
| 579 | std::memcpy(&bits, &value, sizeof(double));
|
| 580 | uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF;
|
| 581 | uint16 hbits = (hi>>16) & 0x8000;
|
| 582 | hi &= 0x7FFFFFFF;
|
| 583 | int exp = hi >> 20;
|
| 584 | if(exp == 2047)
|
| 585 | return hbits | 0x7C00 | (0x3FF&-static_cast<unsigned>((bits&0xFFFFFFFFFFFFF)!=0));
|
| 586 | if(exp > 1038)
|
| 587 | {
|
| 588 | if(R == std::round_toward_infinity)
|
| 589 | return hbits | 0x7C00 - (hbits>>15);
|
| 590 | if(R == std::round_toward_neg_infinity)
|
| 591 | return hbits | 0x7BFF + (hbits>>15);
|
| 592 | return hbits | 0x7BFF + (R!=std::round_toward_zero);
|
| 593 | }
|
| 594 | int g, s = lo != 0;
|
| 595 | if(exp > 1008)
|
| 596 | {
|
| 597 | g = (hi>>9) & 1;
|
| 598 | s |= (hi&0x1FF) != 0;
|
| 599 | hbits |= ((exp-1008)<<10) | ((hi>>10)&0x3FF);
|
| 600 | }
|
| 601 | else if(exp > 997)
|
| 602 | {
|
| 603 | int i = 1018 - exp;
|
| 604 | hi = (hi&0xFFFFF) | 0x100000;
|
| 605 | g = (hi>>i) & 1;
|
| 606 | s |= (hi&((1L<<i)-1)) != 0;
|
| 607 | hbits |= hi >> (i+1);
|
| 608 | }
|
| 609 | else
|
| 610 | {
|
| 611 | g = 0;
|
| 612 | s |= hi != 0;
|
| 613 | }
|
| 614 | if(R == std::round_to_nearest)
|
| 615 | #if HALF_ROUND_TIES_TO_EVEN
|
| 616 | hbits += g & (s|hbits);
|
| 617 | #else
|
| 618 | hbits += g;
|
| 619 | #endif
|
| 620 | else if(R == std::round_toward_infinity)
|
| 621 | hbits += ~(hbits>>15) & (s|g);
|
| 622 | else if(R == std::round_toward_neg_infinity)
|
| 623 | hbits += (hbits>>15) & (g|s);
|
| 624 | return hbits;
|
| 625 | }
|
| 626 |
|
| 627 | /// Convert non-IEEE floating point to half-precision.
|
| 628 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 629 | /// \tparam T source type (builtin floating point type)
|
| 630 | /// \param value floating point value
|
| 631 | /// \return binary representation of half-precision value
|
| 632 | template<std::float_round_style R,typename T> uint16 float2half_impl(T value, ...)
|
| 633 | {
|
| 634 | uint16 hbits = static_cast<unsigned>(builtin_signbit(value)) << 15;
|
| 635 | if(value == T())
|
| 636 | return hbits;
|
| 637 | if(builtin_isnan(value))
|
| 638 | return hbits | 0x7FFF;
|
| 639 | if(builtin_isinf(value))
|
| 640 | return hbits | 0x7C00;
|
| 641 | int exp;
|
| 642 | std::frexp(value, &exp);
|
| 643 | if(exp > 16)
|
| 644 | {
|
| 645 | if(R == std::round_toward_infinity)
|
| 646 | return hbits | (0x7C00-(hbits>>15));
|
| 647 | else if(R == std::round_toward_neg_infinity)
|
| 648 | return hbits | (0x7BFF+(hbits>>15));
|
| 649 | return hbits | (0x7BFF+(R!=std::round_toward_zero));
|
| 650 | }
|
| 651 | if(exp < -13)
|
| 652 | value = std::ldexp(value, 24);
|
| 653 | else
|
| 654 | {
|
| 655 | value = std::ldexp(value, 11-exp);
|
| 656 | hbits |= ((exp+13)<<10);
|
| 657 | }
|
| 658 | T ival, frac = std::modf(value, &ival);
|
| 659 | hbits += static_cast<uint16>(std::abs(static_cast<int>(ival)));
|
| 660 | if(R == std::round_to_nearest)
|
| 661 | {
|
| 662 | frac = std::abs(frac);
|
| 663 | #if HALF_ROUND_TIES_TO_EVEN
|
| 664 | hbits += (frac>T(0.5)) | ((frac==T(0.5))&hbits);
|
| 665 | #else
|
| 666 | hbits += frac >= T(0.5);
|
| 667 | #endif
|
| 668 | }
|
| 669 | else if(R == std::round_toward_infinity)
|
| 670 | hbits += frac > T();
|
| 671 | else if(R == std::round_toward_neg_infinity)
|
| 672 | hbits += frac < T();
|
| 673 | return hbits;
|
| 674 | }
|
| 675 |
|
| 676 | /// Convert floating point to half-precision.
|
| 677 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 678 | /// \tparam T source type (builtin floating point type)
|
| 679 | /// \param value floating point value
|
| 680 | /// \return binary representation of half-precision value
|
| 681 | template<std::float_round_style R,typename T> uint16 float2half(T value)
|
| 682 | {
|
| 683 | return float2half_impl<R>(value, bool_type<std::numeric_limits<T>::is_iec559&&sizeof(typename bits<T>::type)==sizeof(T)>());
|
| 684 | }
|
| 685 |
|
| 686 | /// Convert integer to half-precision floating point.
|
| 687 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 688 | /// \tparam S `true` if value negative, `false` else
|
| 689 | /// \tparam T type to convert (builtin integer type)
|
| 690 | /// \param value non-negative integral value
|
| 691 | /// \return binary representation of half-precision value
|
| 692 | template<std::float_round_style R,bool S,typename T> uint16 int2half_impl(T value)
|
| 693 | {
|
| 694 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
|
| 695 | static_assert(std::is_integral<T>::value, "int to half conversion only supports builtin integer types");
|
| 696 | #endif
|
| 697 | if(S)
|
| 698 | value = -value;
|
| 699 | uint16 bits = S << 15;
|
| 700 | if(value > 0xFFFF)
|
| 701 | {
|
| 702 | if(R == std::round_toward_infinity)
|
| 703 | bits |= 0x7C00 - S;
|
| 704 | else if(R == std::round_toward_neg_infinity)
|
| 705 | bits |= 0x7BFF + S;
|
| 706 | else
|
| 707 | bits |= 0x7BFF + (R!=std::round_toward_zero);
|
| 708 | }
|
| 709 | else if(value)
|
| 710 | {
|
| 711 | unsigned int m = value, exp = 24;
|
| 712 | for(; m<0x400; m<<=1,--exp) ;
|
| 713 | for(; m>0x7FF; m>>=1,++exp) ;
|
| 714 | bits |= (exp<<10) + m;
|
| 715 | if(exp > 24)
|
| 716 | {
|
| 717 | if(R == std::round_to_nearest)
|
| 718 | bits += (value>>(exp-25)) & 1
|
| 719 | #if HALF_ROUND_TIES_TO_EVEN
|
| 720 | & (((((1<<(exp-25))-1)&value)!=0)|bits)
|
| 721 | #endif
|
| 722 | ;
|
| 723 | else if(R == std::round_toward_infinity)
|
| 724 | bits += ((value&((1<<(exp-24))-1))!=0) & !S;
|
| 725 | else if(R == std::round_toward_neg_infinity)
|
| 726 | bits += ((value&((1<<(exp-24))-1))!=0) & S;
|
| 727 | }
|
| 728 | }
|
| 729 | return bits;
|
| 730 | }
|
| 731 |
|
| 732 | /// Convert integer to half-precision floating point.
|
| 733 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 734 | /// \tparam T type to convert (builtin integer type)
|
| 735 | /// \param value integral value
|
| 736 | /// \return binary representation of half-precision value
|
| 737 | template<std::float_round_style R,typename T> uint16 int2half(T value)
|
| 738 | {
|
| 739 | return (value<0) ? int2half_impl<R,true>(value) : int2half_impl<R,false>(value);
|
| 740 | }
|
| 741 |
|
| 742 | /// Convert half-precision to IEEE single-precision.
|
| 743 | /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
|
| 744 | /// \param value binary representation of half-precision value
|
| 745 | /// \return single-precision value
|
| 746 | inline float half2float_impl(uint16 value, float, true_type)
|
| 747 | {
|
| 748 | typedef bits<float>::type uint32;
|
| 749 | /* uint32 bits = static_cast<uint32>(value&0x8000) << 16;
|
| 750 | int abs = value & 0x7FFF;
|
| 751 | if(abs)
|
| 752 | {
|
| 753 | bits |= 0x38000000 << static_cast<unsigned>(abs>=0x7C00);
|
| 754 | for(; abs<0x400; abs<<=1,bits-=0x800000) ;
|
| 755 | bits += static_cast<uint32>(abs) << 13;
|
| 756 | }
|
| 757 | */ static const uint32 mantissa_table[2048] = {
|
| 758 | 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000,
|
| 759 | 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000,
|
| 760 | 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000,
|
| 761 | 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000,
|
| 762 | 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000,
|
| 763 | 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000,
|
| 764 | 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000,
|
| 765 | 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000,
|
| 766 | 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000,
|
| 767 | 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000,
|
| 768 | 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000,
|
| 769 | 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000,
|
| 770 | 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000,
|
| 771 | 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000,
|
| 772 | 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000,
|
| 773 | 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000,
|
| 774 | 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000,
|
| 775 | 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000,
|
| 776 | 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000,
|
| 777 | 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000,
|
| 778 | 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000,
|
| 779 | 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000,
|
| 780 | 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000,
|
| 781 | 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000,
|
| 782 | 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000,
|
| 783 | 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000,
|
| 784 | 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000,
|
| 785 | 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000,
|
| 786 | 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000,
|
| 787 | 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000,
|
| 788 | 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000,
|
| 789 | 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000,
|
| 790 | 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000,
|
| 791 | 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000,
|
| 792 | 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000,
|
| 793 | 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000,
|
| 794 | 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000,
|
| 795 | 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000,
|
| 796 | 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000,
|
| 797 | 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000,
|
| 798 | 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000,
|
| 799 | 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000,
|
| 800 | 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000,
|
| 801 | 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000,
|
| 802 | 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000,
|
| 803 | 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000,
|
| 804 | 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000,
|
| 805 | 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000,
|
| 806 | 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000,
|
| 807 | 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000,
|
| 808 | 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000,
|
| 809 | 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000,
|
| 810 | 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000,
|
| 811 | 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000,
|
| 812 | 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000,
|
| 813 | 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000,
|
| 814 | 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000,
|
| 815 | 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000,
|
| 816 | 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000,
|
| 817 | 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000,
|
| 818 | 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000,
|
| 819 | 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000,
|
| 820 | 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000,
|
| 821 | 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000,
|
| 822 | 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000,
|
| 823 | 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000,
|
| 824 | 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000,
|
| 825 | 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000,
|
| 826 | 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000,
|
| 827 | 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000,
|
| 828 | 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000,
|
| 829 | 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000,
|
| 830 | 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000,
|
| 831 | 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000,
|
| 832 | 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000,
|
| 833 | 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000,
|
| 834 | 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000,
|
| 835 | 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000,
|
| 836 | 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000,
|
| 837 | 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000,
|
| 838 | 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000,
|
| 839 | 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000,
|
| 840 | 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000,
|
| 841 | 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000,
|
| 842 | 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000,
|
| 843 | 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000,
|
| 844 | 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000,
|
| 845 | 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000,
|
| 846 | 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000,
|
| 847 | 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000,
|
| 848 | 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000,
|
| 849 | 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000,
|
| 850 | 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000,
|
| 851 | 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000,
|
| 852 | 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000,
|
| 853 | 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000,
|
| 854 | 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000,
|
| 855 | 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000,
|
| 856 | 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000,
|
| 857 | 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000,
|
| 858 | 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000,
|
| 859 | 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000,
|
| 860 | 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000,
|
| 861 | 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000,
|
| 862 | 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000,
|
| 863 | 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000,
|
| 864 | 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000,
|
| 865 | 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000,
|
| 866 | 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000,
|
| 867 | 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000,
|
| 868 | 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000,
|
| 869 | 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000,
|
| 870 | 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000,
|
| 871 | 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000,
|
| 872 | 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000,
|
| 873 | 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000,
|
| 874 | 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000,
|
| 875 | 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000,
|
| 876 | 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000,
|
| 877 | 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000,
|
| 878 | 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000,
|
| 879 | 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000,
|
| 880 | 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000,
|
| 881 | 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000,
|
| 882 | 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000,
|
| 883 | 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000,
|
| 884 | 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000,
|
| 885 | 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 };
|
| 886 | static const uint32 exponent_table[64] = {
|
| 887 | 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000,
|
| 888 | 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000,
|
| 889 | 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
|
| 890 | 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 };
|
| 891 | static const unsigned short offset_table[64] = {
|
| 892 | 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
|
| 893 | 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 };
|
| 894 | uint32 bits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10];
|
| 895 | // return *reinterpret_cast<float*>(&bits); //violating strict aliasing!
|
| 896 | float out;
|
| 897 | std::memcpy(&out, &bits, sizeof(float));
|
| 898 | return out;
|
| 899 | }
|
| 900 |
|
| 901 | /// Convert half-precision to IEEE double-precision.
|
| 902 | /// \param value binary representation of half-precision value
|
| 903 | /// \return double-precision value
|
| 904 | inline double half2float_impl(uint16 value, double, true_type)
|
| 905 | {
|
| 906 | typedef bits<float>::type uint32;
|
| 907 | typedef bits<double>::type uint64;
|
| 908 | uint32 hi = static_cast<uint32>(value&0x8000) << 16;
|
| 909 | int abs = value & 0x7FFF;
|
| 910 | if(abs)
|
| 911 | {
|
| 912 | hi |= 0x3F000000 << static_cast<unsigned>(abs>=0x7C00);
|
| 913 | for(; abs<0x400; abs<<=1,hi-=0x100000) ;
|
| 914 | hi += static_cast<uint32>(abs) << 10;
|
| 915 | }
|
| 916 | uint64 bits = static_cast<uint64>(hi) << 32;
|
| 917 | // return *reinterpret_cast<double*>(&bits); //violating strict aliasing!
|
| 918 | double out;
|
| 919 | std::memcpy(&out, &bits, sizeof(double));
|
| 920 | return out;
|
| 921 | }
|
| 922 |
|
| 923 | /// Convert half-precision to non-IEEE floating point.
|
| 924 | /// \tparam T type to convert to (builtin integer type)
|
| 925 | /// \param value binary representation of half-precision value
|
| 926 | /// \return floating point value
|
| 927 | template<typename T> T half2float_impl(uint16 value, T, ...)
|
| 928 | {
|
| 929 | T out;
|
| 930 | int abs = value & 0x7FFF;
|
| 931 | if(abs > 0x7C00)
|
| 932 | out = std::numeric_limits<T>::has_quiet_NaN ? std::numeric_limits<T>::quiet_NaN() : T();
|
| 933 | else if(abs == 0x7C00)
|
| 934 | out = std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : std::numeric_limits<T>::max();
|
| 935 | else if(abs > 0x3FF)
|
| 936 | out = std::ldexp(static_cast<T>((abs&0x3FF)|0x400), (abs>>10)-25);
|
| 937 | else
|
| 938 | out = std::ldexp(static_cast<T>(abs), -24);
|
| 939 | return (value&0x8000) ? -out : out;
|
| 940 | }
|
| 941 |
|
| 942 | /// Convert half-precision to floating point.
|
| 943 | /// \tparam T type to convert to (builtin integer type)
|
| 944 | /// \param value binary representation of half-precision value
|
| 945 | /// \return floating point value
|
| 946 | template<typename T> T half2float(uint16 value)
|
| 947 | {
|
| 948 | return half2float_impl(value, T(), bool_type<std::numeric_limits<T>::is_iec559&&sizeof(typename bits<T>::type)==sizeof(T)>());
|
| 949 | }
|
| 950 |
|
| 951 | /// Convert half-precision floating point to integer.
|
| 952 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 953 | /// \tparam E `true` for round to even, `false` for round away from zero
|
| 954 | /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
|
| 955 | /// \param value binary representation of half-precision value
|
| 956 | /// \return integral value
|
| 957 | template<std::float_round_style R,bool E,typename T> T half2int_impl(uint16 value)
|
| 958 | {
|
| 959 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
|
| 960 | static_assert(std::is_integral<T>::value, "half to int conversion only supports builtin integer types");
|
| 961 | #endif
|
| 962 | unsigned int e = value & 0x7FFF;
|
| 963 | if(e >= 0x7C00)
|
| 964 | return (value&0x8000) ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
|
| 965 | if(e < 0x3800)
|
| 966 | {
|
| 967 | if(R == std::round_toward_infinity)
|
| 968 | return T(~(value>>15)&(e!=0));
|
| 969 | else if(R == std::round_toward_neg_infinity)
|
| 970 | return -T(value>0x8000);
|
| 971 | return T();
|
| 972 | }
|
| 973 | unsigned int m = (value&0x3FF) | 0x400;
|
| 974 | e >>= 10;
|
| 975 | if(e < 25)
|
| 976 | {
|
| 977 | if(R == std::round_to_nearest)
|
| 978 | m += (1<<(24-e)) - (~(m>>(25-e))&E);
|
| 979 | else if(R == std::round_toward_infinity)
|
| 980 | m += ((value>>15)-1) & ((1<<(25-e))-1U);
|
| 981 | else if(R == std::round_toward_neg_infinity)
|
| 982 | m += -(value>>15) & ((1<<(25-e))-1U);
|
| 983 | m >>= 25 - e;
|
| 984 | }
|
| 985 | else
|
| 986 | m <<= e - 25;
|
| 987 | return (value&0x8000) ? -static_cast<T>(m) : static_cast<T>(m);
|
| 988 | }
|
| 989 |
|
| 990 | /// Convert half-precision floating point to integer.
|
| 991 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 992 | /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
|
| 993 | /// \param value binary representation of half-precision value
|
| 994 | /// \return integral value
|
| 995 | template<std::float_round_style R,typename T> T half2int(uint16 value) { return half2int_impl<R,HALF_ROUND_TIES_TO_EVEN,T>(value); }
|
| 996 |
|
| 997 | /// Convert half-precision floating point to integer using round-to-nearest-away-from-zero.
|
| 998 | /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
|
| 999 | /// \param value binary representation of half-precision value
|
| 1000 | /// \return integral value
|
| 1001 | template<typename T> T half2int_up(uint16 value) { return half2int_impl<std::round_to_nearest,0,T>(value); }
|
| 1002 |
|
| 1003 | /// Round half-precision number to nearest integer value.
|
| 1004 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 1005 | /// \tparam E `true` for round to even, `false` for round away from zero
|
| 1006 | /// \param value binary representation of half-precision value
|
| 1007 | /// \return half-precision bits for nearest integral value
|
| 1008 | template<std::float_round_style R,bool E> uint16 round_half_impl(uint16 value)
|
| 1009 | {
|
| 1010 | unsigned int e = value & 0x7FFF;
|
| 1011 | uint16 result = value;
|
| 1012 | if(e < 0x3C00)
|
| 1013 | {
|
| 1014 | result &= 0x8000;
|
| 1015 | if(R == std::round_to_nearest)
|
| 1016 | result |= 0x3C00U & -(e>=(0x3800+E));
|
| 1017 | else if(R == std::round_toward_infinity)
|
| 1018 | result |= 0x3C00U & -(~(value>>15)&(e!=0));
|
| 1019 | else if(R == std::round_toward_neg_infinity)
|
| 1020 | result |= 0x3C00U & -(value>0x8000);
|
| 1021 | }
|
| 1022 | else if(e < 0x6400)
|
| 1023 | {
|
| 1024 | e = 25 - (e>>10);
|
| 1025 | unsigned int mask = (1<<e) - 1;
|
| 1026 | if(R == std::round_to_nearest)
|
| 1027 | result += (1<<(e-1)) - (~(result>>e)&E);
|
| 1028 | else if(R == std::round_toward_infinity)
|
| 1029 | result += mask & ((value>>15)-1);
|
| 1030 | else if(R == std::round_toward_neg_infinity)
|
| 1031 | result += mask & -(value>>15);
|
| 1032 | result &= ~mask;
|
| 1033 | }
|
| 1034 | return result;
|
| 1035 | }
|
| 1036 |
|
| 1037 | /// Round half-precision number to nearest integer value.
|
| 1038 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
| 1039 | /// \param value binary representation of half-precision value
|
| 1040 | /// \return half-precision bits for nearest integral value
|
| 1041 | template<std::float_round_style R> uint16 round_half(uint16 value) { return round_half_impl<R,HALF_ROUND_TIES_TO_EVEN>(value); }
|
| 1042 |
|
| 1043 | /// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero.
|
| 1044 | /// \param value binary representation of half-precision value
|
| 1045 | /// \return half-precision bits for nearest integral value
|
| 1046 | inline uint16 round_half_up(uint16 value) { return round_half_impl<std::round_to_nearest,0>(value); }
|
| 1047 | /// \}
|
| 1048 |
|
| 1049 | struct functions;
|
| 1050 | template<typename> struct unary_specialized;
|
| 1051 | template<typename,typename> struct binary_specialized;
|
| 1052 | template<typename,typename,std::float_round_style> struct half_caster;
|
| 1053 | }
|
| 1054 |
|
| 1055 | /// Half-precision floating point type.
|
| 1056 | /// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and
|
| 1057 | /// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and
|
| 1058 | /// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations
|
| 1059 | /// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to
|
| 1060 | /// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic
|
| 1061 | /// expressions are kept in single-precision as long as possible (while of course still maintaining a strong half-precision type).
|
| 1062 | ///
|
| 1063 | /// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and
|
| 1064 | /// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which
|
| 1065 | /// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the
|
| 1066 | /// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of
|
| 1067 | /// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most
|
| 1068 | /// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit
|
| 1069 | /// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if
|
| 1070 | /// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on
|
| 1071 | /// nearly any reasonable platform.
|
| 1072 | ///
|
| 1073 | /// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable
|
| 1074 | /// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation.
|
| 1075 | class half
|
| 1076 | {
|
| 1077 | friend struct detail::functions;
|
| 1078 | friend struct detail::unary_specialized<half>;
|
| 1079 | friend struct detail::binary_specialized<half,half>;
|
| 1080 | template<typename,typename,std::float_round_style> friend struct detail::half_caster;
|
| 1081 | friend class std::numeric_limits<half>;
|
| 1082 | #if HALF_ENABLE_CPP11_HASH
|
| 1083 | friend struct std::hash<half>;
|
| 1084 | #endif
|
| 1085 | #if HALF_ENABLE_CPP11_USER_LITERALS
|
| 1086 | friend half literal::operator""_h(long double);
|
| 1087 | #endif
|
| 1088 |
|
| 1089 | public:
|
| 1090 | /// Default constructor.
|
| 1091 | /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics
|
| 1092 | /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics.
|
| 1093 | HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {}
|
| 1094 |
|
| 1095 | /// Copy constructor.
|
| 1096 | /// \tparam T type of concrete half expression
|
| 1097 | /// \param rhs half expression to copy from
|
| 1098 | half(detail::expr rhs) : data_(detail::float2half<round_style>(static_cast<float>(rhs))) {}
|
| 1099 |
|
| 1100 | /// Conversion constructor.
|
| 1101 | /// \param rhs float to convert
|
| 1102 | explicit half(float rhs) : data_(detail::float2half<round_style>(rhs)) {}
|
| 1103 |
|
| 1104 | /// Conversion to single-precision.
|
| 1105 | /// \return single precision value representing expression value
|
| 1106 | operator float() const { return detail::half2float<float>(data_); }
|
| 1107 |
|
| 1108 | /// Assignment operator.
|
| 1109 | /// \tparam T type of concrete half expression
|
| 1110 | /// \param rhs half expression to copy from
|
| 1111 | /// \return reference to this half
|
| 1112 | half& operator=(detail::expr rhs) { return *this = static_cast<float>(rhs); }
|
| 1113 |
|
| 1114 | /// Arithmetic assignment.
|
| 1115 | /// \tparam T type of concrete half expression
|
| 1116 | /// \param rhs half expression to add
|
| 1117 | /// \return reference to this half
|
| 1118 | template<typename T> typename detail::enable<half&,T>::type operator+=(T rhs) { return *this += static_cast<float>(rhs); }
|
| 1119 |
|
| 1120 | /// Arithmetic assignment.
|
| 1121 | /// \tparam T type of concrete half expression
|
| 1122 | /// \param rhs half expression to subtract
|
| 1123 | /// \return reference to this half
|
| 1124 | template<typename T> typename detail::enable<half&,T>::type operator-=(T rhs) { return *this -= static_cast<float>(rhs); }
|
| 1125 |
|
| 1126 | /// Arithmetic assignment.
|
| 1127 | /// \tparam T type of concrete half expression
|
| 1128 | /// \param rhs half expression to multiply with
|
| 1129 | /// \return reference to this half
|
| 1130 | template<typename T> typename detail::enable<half&,T>::type operator*=(T rhs) { return *this *= static_cast<float>(rhs); }
|
| 1131 |
|
| 1132 | /// Arithmetic assignment.
|
| 1133 | /// \tparam T type of concrete half expression
|
| 1134 | /// \param rhs half expression to divide by
|
| 1135 | /// \return reference to this half
|
| 1136 | template<typename T> typename detail::enable<half&,T>::type operator/=(T rhs) { return *this /= static_cast<float>(rhs); }
|
| 1137 |
|
| 1138 | /// Assignment operator.
|
| 1139 | /// \param rhs single-precision value to copy from
|
| 1140 | /// \return reference to this half
|
| 1141 | half& operator=(float rhs) { data_ = detail::float2half<round_style>(rhs); return *this; }
|
| 1142 |
|
| 1143 | /// Arithmetic assignment.
|
| 1144 | /// \param rhs single-precision value to add
|
| 1145 | /// \return reference to this half
|
| 1146 | half& operator+=(float rhs) { data_ = detail::float2half<round_style>(detail::half2float<float>(data_)+rhs); return *this; }
|
| 1147 |
|
| 1148 | /// Arithmetic assignment.
|
| 1149 | /// \param rhs single-precision value to subtract
|
| 1150 | /// \return reference to this half
|
| 1151 | half& operator-=(float rhs) { data_ = detail::float2half<round_style>(detail::half2float<float>(data_)-rhs); return *this; }
|
| 1152 |
|
| 1153 | /// Arithmetic assignment.
|
| 1154 | /// \param rhs single-precision value to multiply with
|
| 1155 | /// \return reference to this half
|
| 1156 | half& operator*=(float rhs) { data_ = detail::float2half<round_style>(detail::half2float<float>(data_)*rhs); return *this; }
|
| 1157 |
|
| 1158 | /// Arithmetic assignment.
|
| 1159 | /// \param rhs single-precision value to divide by
|
| 1160 | /// \return reference to this half
|
| 1161 | half& operator/=(float rhs) { data_ = detail::float2half<round_style>(detail::half2float<float>(data_)/rhs); return *this; }
|
| 1162 |
|
| 1163 | /// Prefix increment.
|
| 1164 | /// \return incremented half value
|
| 1165 | half& operator++() { return *this += 1.0f; }
|
| 1166 |
|
| 1167 | /// Prefix decrement.
|
| 1168 | /// \return decremented half value
|
| 1169 | half& operator--() { return *this -= 1.0f; }
|
| 1170 |
|
| 1171 | /// Postfix increment.
|
| 1172 | /// \return non-incremented half value
|
| 1173 | half operator++(int) { half out(*this); ++*this; return out; }
|
| 1174 |
|
| 1175 | /// Postfix decrement.
|
| 1176 | /// \return non-decremented half value
|
| 1177 | half operator--(int) { half out(*this); --*this; return out; }
|
| 1178 |
|
| 1179 | private:
|
| 1180 | /// Rounding mode to use
|
| 1181 | static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE);
|
| 1182 |
|
| 1183 | /// Constructor.
|
| 1184 | /// \param bits binary representation to set half to
|
| 1185 | HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT : data_(bits) {}
|
| 1186 |
|
| 1187 | /// Internal binary representation
|
| 1188 | detail::uint16 data_;
|
| 1189 | };
|
| 1190 |
|
| 1191 | #if HALF_ENABLE_CPP11_USER_LITERALS
|
| 1192 | namespace literal
|
| 1193 | {
|
| 1194 | /// Half literal.
|
| 1195 | /// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due
|
| 1196 | /// to rather involved conversions.
|
| 1197 | /// \param value literal value
|
| 1198 | /// \return half with given value (if representable)
|
| 1199 | inline half operator""_h(long double value) { return half(detail::binary, detail::float2half<half::round_style>(value)); }
|
| 1200 | }
|
| 1201 | #endif
|
| 1202 |
|
| 1203 | namespace detail
|
| 1204 | {
|
| 1205 | /// Wrapper implementing unspecialized half-precision functions.
|
| 1206 | struct functions
|
| 1207 | {
|
| 1208 | /// Addition implementation.
|
| 1209 | /// \param x first operand
|
| 1210 | /// \param y second operand
|
| 1211 | /// \return Half-precision sum stored in single-precision
|
| 1212 | static expr plus(float x, float y) { return expr(x+y); }
|
| 1213 |
|
| 1214 | /// Subtraction implementation.
|
| 1215 | /// \param x first operand
|
| 1216 | /// \param y second operand
|
| 1217 | /// \return Half-precision difference stored in single-precision
|
| 1218 | static expr minus(float x, float y) { return expr(x-y); }
|
| 1219 |
|
| 1220 | /// Multiplication implementation.
|
| 1221 | /// \param x first operand
|
| 1222 | /// \param y second operand
|
| 1223 | /// \return Half-precision product stored in single-precision
|
| 1224 | static expr multiplies(float x, float y) { return expr(x*y); }
|
| 1225 |
|
| 1226 | /// Division implementation.
|
| 1227 | /// \param x first operand
|
| 1228 | /// \param y second operand
|
| 1229 | /// \return Half-precision quotient stored in single-precision
|
| 1230 | static expr divides(float x, float y) { return expr(x/y); }
|
| 1231 |
|
| 1232 | /// Output implementation.
|
| 1233 | /// \param out stream to write to
|
| 1234 | /// \param arg value to write
|
| 1235 | /// \return reference to stream
|
| 1236 | template<typename charT,typename traits> static std::basic_ostream<charT,traits>& write(std::basic_ostream<charT,traits> &out, float arg) { return out << arg; }
|
| 1237 |
|
| 1238 | /// Input implementation.
|
| 1239 | /// \param in stream to read from
|
| 1240 | /// \param arg half to read into
|
| 1241 | /// \return reference to stream
|
| 1242 | template<typename charT,typename traits> static std::basic_istream<charT,traits>& read(std::basic_istream<charT,traits> &in, half &arg)
|
| 1243 | {
|
| 1244 | float f;
|
| 1245 | if(in >> f)
|
| 1246 | arg = f;
|
| 1247 | return in;
|
| 1248 | }
|
| 1249 |
|
| 1250 | /// Modulo implementation.
|
| 1251 | /// \param x first operand
|
| 1252 | /// \param y second operand
|
| 1253 | /// \return Half-precision division remainder stored in single-precision
|
| 1254 | static expr fmod(float x, float y) { return expr(std::fmod(x, y)); }
|
| 1255 |
|
| 1256 | /// Remainder implementation.
|
| 1257 | /// \param x first operand
|
| 1258 | /// \param y second operand
|
| 1259 | /// \return Half-precision division remainder stored in single-precision
|
| 1260 | static expr remainder(float x, float y)
|
| 1261 | {
|
| 1262 | #if HALF_ENABLE_CPP11_CMATH
|
| 1263 | return expr(std::remainder(x, y));
|
| 1264 | #else
|
| 1265 | if(builtin_isnan(x) || builtin_isnan(y))
|
| 1266 | return expr(std::numeric_limits<float>::quiet_NaN());
|
| 1267 | float ax = std::fabs(x), ay = std::fabs(y);
|
| 1268 | if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24))
|
| 1269 | return expr(std::numeric_limits<float>::quiet_NaN());
|
| 1270 | if(ay >= 65536.0f)
|
| 1271 | return expr(x);
|
| 1272 | if(ax == ay)
|
| 1273 | return expr(builtin_signbit(x) ? -0.0f : 0.0f);
|
| 1274 | ax = std::fmod(ax, ay+ay);
|
| 1275 | float y2 = 0.5f * ay;
|
| 1276 | if(ax > y2)
|
| 1277 | {
|
| 1278 | ax -= ay;
|
| 1279 | if(ax >= y2)
|
| 1280 | ax -= ay;
|
| 1281 | }
|
| 1282 | return expr(builtin_signbit(x) ? -ax : ax);
|
| 1283 | #endif
|
| 1284 | }
|
| 1285 |
|
| 1286 | /// Remainder implementation.
|
| 1287 | /// \param x first operand
|
| 1288 | /// \param y second operand
|
| 1289 | /// \param quo address to store quotient bits at
|
| 1290 | /// \return Half-precision division remainder stored in single-precision
|
| 1291 | static expr remquo(float x, float y, int *quo)
|
| 1292 | {
|
| 1293 | #if HALF_ENABLE_CPP11_CMATH
|
| 1294 | return expr(std::remquo(x, y, quo));
|
| 1295 | #else
|
| 1296 | if(builtin_isnan(x) || builtin_isnan(y))
|
| 1297 | return expr(std::numeric_limits<float>::quiet_NaN());
|
| 1298 | bool sign = builtin_signbit(x), qsign = static_cast<bool>(sign^builtin_signbit(y));
|
| 1299 | float ax = std::fabs(x), ay = std::fabs(y);
|
| 1300 | if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24))
|
| 1301 | return expr(std::numeric_limits<float>::quiet_NaN());
|
| 1302 | if(ay >= 65536.0f)
|
| 1303 | return expr(x);
|
| 1304 | if(ax == ay)
|
| 1305 | return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f);
|
| 1306 | ax = std::fmod(ax, 8.0f*ay);
|
| 1307 | int cquo = 0;
|
| 1308 | if(ax >= 4.0f * ay)
|
| 1309 | {
|
| 1310 | ax -= 4.0f * ay;
|
| 1311 | cquo += 4;
|
| 1312 | }
|
| 1313 | if(ax >= 2.0f * ay)
|
| 1314 | {
|
| 1315 | ax -= 2.0f * ay;
|
| 1316 | cquo += 2;
|
| 1317 | }
|
| 1318 | float y2 = 0.5f * ay;
|
| 1319 | if(ax > y2)
|
| 1320 | {
|
| 1321 | ax -= ay;
|
| 1322 | ++cquo;
|
| 1323 | if(ax >= y2)
|
| 1324 | {
|
| 1325 | ax -= ay;
|
| 1326 | ++cquo;
|
| 1327 | }
|
| 1328 | }
|
| 1329 | return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax);
|
| 1330 | #endif
|
| 1331 | }
|
| 1332 |
|
| 1333 | /// Positive difference implementation.
|
| 1334 | /// \param x first operand
|
| 1335 | /// \param y second operand
|
| 1336 | /// \return Positive difference stored in single-precision
|
| 1337 | static expr fdim(float x, float y)
|
| 1338 | {
|
| 1339 | #if HALF_ENABLE_CPP11_CMATH
|
| 1340 | return expr(std::fdim(x, y));
|
| 1341 | #else
|
| 1342 | return expr((x<=y) ? 0.0f : (x-y));
|
| 1343 | #endif
|
| 1344 | }
|
| 1345 |
|
| 1346 | /// Fused multiply-add implementation.
|
| 1347 | /// \param x first operand
|
| 1348 | /// \param y second operand
|
| 1349 | /// \param z third operand
|
| 1350 | /// \return \a x * \a y + \a z stored in single-precision
|
| 1351 | static expr fma(float x, float y, float z)
|
| 1352 | {
|
| 1353 | #if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF)
|
| 1354 | return expr(std::fma(x, y, z));
|
| 1355 | #else
|
| 1356 | return expr(x*y+z);
|
| 1357 | #endif
|
| 1358 | }
|
| 1359 |
|
| 1360 | /// Get NaN.
|
| 1361 | /// \return Half-precision quiet NaN
|
| 1362 | static half nanh() { return half(binary, 0x7FFF); }
|
| 1363 |
|
| 1364 | /// Exponential implementation.
|
| 1365 | /// \param arg function argument
|
| 1366 | /// \return function value stored in single-preicision
|
| 1367 | static expr exp(float arg) { return expr(std::exp(arg)); }
|
| 1368 |
|
| 1369 | /// Exponential implementation.
|
| 1370 | /// \param arg function argument
|
| 1371 | /// \return function value stored in single-preicision
|
| 1372 | static expr expm1(float arg)
|
| 1373 | {
|
| 1374 | #if HALF_ENABLE_CPP11_CMATH
|
| 1375 | return expr(std::expm1(arg));
|
| 1376 | #else
|
| 1377 | return expr(static_cast<float>(std::exp(static_cast<double>(arg))-1.0));
|
| 1378 | #endif
|
| 1379 | }
|
| 1380 |
|
| 1381 | /// Binary exponential implementation.
|
| 1382 | /// \param arg function argument
|
| 1383 | /// \return function value stored in single-preicision
|
| 1384 | static expr exp2(float arg)
|
| 1385 | {
|
| 1386 | #if HALF_ENABLE_CPP11_CMATH
|
| 1387 | return expr(std::exp2(arg));
|
| 1388 | #else
|
| 1389 | return expr(static_cast<float>(std::exp(arg*0.69314718055994530941723212145818)));
|
| 1390 | #endif
|
| 1391 | }
|
| 1392 |
|
| 1393 | /// Logarithm implementation.
|
| 1394 | /// \param arg function argument
|
| 1395 | /// \return function value stored in single-preicision
|
| 1396 | static expr log(float arg) { return expr(std::log(arg)); }
|
| 1397 |
|
| 1398 | /// Common logarithm implementation.
|
| 1399 | /// \param arg function argument
|
| 1400 | /// \return function value stored in single-preicision
|
| 1401 | static expr log10(float arg) { return expr(std::log10(arg)); }
|
| 1402 |
|
| 1403 | /// Logarithm implementation.
|
| 1404 | /// \param arg function argument
|
| 1405 | /// \return function value stored in single-preicision
|
| 1406 | static expr log1p(float arg)
|
| 1407 | {
|
| 1408 | #if HALF_ENABLE_CPP11_CMATH
|
| 1409 | return expr(std::log1p(arg));
|
| 1410 | #else
|
| 1411 | return expr(static_cast<float>(std::log(1.0+arg)));
|
| 1412 | #endif
|
| 1413 | }
|
| 1414 |
|
| 1415 | /// Binary logarithm implementation.
|
| 1416 | /// \param arg function argument
|
| 1417 | /// \return function value stored in single-preicision
|
| 1418 | static expr log2(float arg)
|
| 1419 | {
|
| 1420 | #if HALF_ENABLE_CPP11_CMATH
|
| 1421 | return expr(std::log2(arg));
|
| 1422 | #else
|
| 1423 | return expr(static_cast<float>(std::log(static_cast<double>(arg))*1.4426950408889634073599246810019));
|
| 1424 | #endif
|
| 1425 | }
|
| 1426 |
|
| 1427 | /// Square root implementation.
|
| 1428 | /// \param arg function argument
|
| 1429 | /// \return function value stored in single-preicision
|
| 1430 | static expr sqrt(float arg) { return expr(std::sqrt(arg)); }
|
| 1431 |
|
| 1432 | /// Cubic root implementation.
|
| 1433 | /// \param arg function argument
|
| 1434 | /// \return function value stored in single-preicision
|
| 1435 | static expr cbrt(float arg)
|
| 1436 | {
|
| 1437 | #if HALF_ENABLE_CPP11_CMATH
|
| 1438 | return expr(std::cbrt(arg));
|
| 1439 | #else
|
| 1440 | if(builtin_isnan(arg) || builtin_isinf(arg))
|
| 1441 | return expr(arg);
|
| 1442 | return expr(builtin_signbit(arg) ? -static_cast<float>(std::pow(-static_cast<double>(arg), 1.0/3.0)) :
|
| 1443 | static_cast<float>(std::pow(static_cast<double>(arg), 1.0/3.0)));
|
| 1444 | #endif
|
| 1445 | }
|
| 1446 |
|
| 1447 | /// Hypotenuse implementation.
|
| 1448 | /// \param x first argument
|
| 1449 | /// \param y second argument
|
| 1450 | /// \return function value stored in single-preicision
|
| 1451 | static expr hypot(float x, float y)
|
| 1452 | {
|
| 1453 | #if HALF_ENABLE_CPP11_CMATH
|
| 1454 | return expr(std::hypot(x, y));
|
| 1455 | #else
|
| 1456 | return expr((builtin_isinf(x) || builtin_isinf(y)) ? std::numeric_limits<float>::infinity() :
|
| 1457 | static_cast<float>(std::sqrt(static_cast<double>(x)*x+static_cast<double>(y)*y)));
|
| 1458 | #endif
|
| 1459 | }
|
| 1460 |
|
| 1461 | /// Power implementation.
|
| 1462 | /// \param base value to exponentiate
|
| 1463 | /// \param exp power to expontiate to
|
| 1464 | /// \return function value stored in single-preicision
|
| 1465 | static expr pow(float base, float exp) { return expr(std::pow(base, exp)); }
|
| 1466 |
|
| 1467 | /// Sine implementation.
|
| 1468 | /// \param arg function argument
|
| 1469 | /// \return function value stored in single-preicision
|
| 1470 | static expr sin(float arg) { return expr(std::sin(arg)); }
|
| 1471 |
|
| 1472 | /// Cosine implementation.
|
| 1473 | /// \param arg function argument
|
| 1474 | /// \return function value stored in single-preicision
|
| 1475 | static expr cos(float arg) { return expr(std::cos(arg)); }
|
| 1476 |
|
| 1477 | /// Tan implementation.
|
| 1478 | /// \param arg function argument
|
| 1479 | /// \return function value stored in single-preicision
|
| 1480 | static expr tan(float arg) { return expr(std::tan(arg)); }
|
| 1481 |
|
| 1482 | /// Arc sine implementation.
|
| 1483 | /// \param arg function argument
|
| 1484 | /// \return function value stored in single-preicision
|
| 1485 | static expr asin(float arg) { return expr(std::asin(arg)); }
|
| 1486 |
|
| 1487 | /// Arc cosine implementation.
|
| 1488 | /// \param arg function argument
|
| 1489 | /// \return function value stored in single-preicision
|
| 1490 | static expr acos(float arg) { return expr(std::acos(arg)); }
|
| 1491 |
|
| 1492 | /// Arc tangent implementation.
|
| 1493 | /// \param arg function argument
|
| 1494 | /// \return function value stored in single-preicision
|
| 1495 | static expr atan(float arg) { return expr(std::atan(arg)); }
|
| 1496 |
|
| 1497 | /// Arc tangent implementation.
|
| 1498 | /// \param x first argument
|
| 1499 | /// \param y second argument
|
| 1500 | /// \return function value stored in single-preicision
|
| 1501 | static expr atan2(float x, float y) { return expr(std::atan2(x, y)); }
|
| 1502 |
|
| 1503 | /// Hyperbolic sine implementation.
|
| 1504 | /// \param arg function argument
|
| 1505 | /// \return function value stored in single-preicision
|
| 1506 | static expr sinh(float arg) { return expr(std::sinh(arg)); }
|
| 1507 |
|
| 1508 | /// Hyperbolic cosine implementation.
|
| 1509 | /// \param arg function argument
|
| 1510 | /// \return function value stored in single-preicision
|
| 1511 | static expr cosh(float arg) { return expr(std::cosh(arg)); }
|
| 1512 |
|
| 1513 | /// Hyperbolic tangent implementation.
|
| 1514 | /// \param arg function argument
|
| 1515 | /// \return function value stored in single-preicision
|
| 1516 | static expr tanh(float arg) { return expr(std::tanh(arg)); }
|
| 1517 |
|
| 1518 | /// Hyperbolic area sine implementation.
|
| 1519 | /// \param arg function argument
|
| 1520 | /// \return function value stored in single-preicision
|
| 1521 | static expr asinh(float arg)
|
| 1522 | {
|
| 1523 | #if HALF_ENABLE_CPP11_CMATH
|
| 1524 | return expr(std::asinh(arg));
|
| 1525 | #else
|
| 1526 | return expr((arg==-std::numeric_limits<float>::infinity()) ? arg : static_cast<float>(std::log(arg+std::sqrt(arg*arg+1.0))));
|
| 1527 | #endif
|
| 1528 | }
|
| 1529 |
|
| 1530 | /// Hyperbolic area cosine implementation.
|
| 1531 | /// \param arg function argument
|
| 1532 | /// \return function value stored in single-preicision
|
| 1533 | static expr acosh(float arg)
|
| 1534 | {
|
| 1535 | #if HALF_ENABLE_CPP11_CMATH
|
| 1536 | return expr(std::acosh(arg));
|
| 1537 | #else
|
| 1538 | return expr((arg<-1.0f) ? std::numeric_limits<float>::quiet_NaN() : static_cast<float>(std::log(arg+std::sqrt(arg*arg-1.0))));
|
| 1539 | #endif
|
| 1540 | }
|
| 1541 |
|
| 1542 | /// Hyperbolic area tangent implementation.
|
| 1543 | /// \param arg function argument
|
| 1544 | /// \return function value stored in single-preicision
|
| 1545 | static expr atanh(float arg)
|
| 1546 | {
|
| 1547 | #if HALF_ENABLE_CPP11_CMATH
|
| 1548 | return expr(std::atanh(arg));
|
| 1549 | #else
|
| 1550 | return expr(static_cast<float>(0.5*std::log((1.0+arg)/(1.0-arg))));
|
| 1551 | #endif
|
| 1552 | }
|
| 1553 |
|
| 1554 | /// Error function implementation.
|
| 1555 | /// \param arg function argument
|
| 1556 | /// \return function value stored in single-preicision
|
| 1557 | static expr erf(float arg)
|
| 1558 | {
|
| 1559 | #if HALF_ENABLE_CPP11_CMATH
|
| 1560 | return expr(std::erf(arg));
|
| 1561 | #else
|
| 1562 | return expr(static_cast<float>(erf(static_cast<double>(arg))));
|
| 1563 | #endif
|
| 1564 | }
|
| 1565 |
|
| 1566 | /// Complementary implementation.
|
| 1567 | /// \param arg function argument
|
| 1568 | /// \return function value stored in single-preicision
|
| 1569 | static expr erfc(float arg)
|
| 1570 | {
|
| 1571 | #if HALF_ENABLE_CPP11_CMATH
|
| 1572 | return expr(std::erfc(arg));
|
| 1573 | #else
|
| 1574 | return expr(static_cast<float>(1.0-erf(static_cast<double>(arg))));
|
| 1575 | #endif
|
| 1576 | }
|
| 1577 |
|
| 1578 | /// Gamma logarithm implementation.
|
| 1579 | /// \param arg function argument
|
| 1580 | /// \return function value stored in single-preicision
|
| 1581 | static expr lgamma(float arg)
|
| 1582 | {
|
| 1583 | #if HALF_ENABLE_CPP11_CMATH
|
| 1584 | return expr(std::lgamma(arg));
|
| 1585 | #else
|
| 1586 | if(builtin_isinf(arg))
|
| 1587 | return expr(std::numeric_limits<float>::infinity());
|
| 1588 | if(arg < 0.0f)
|
| 1589 | {
|
| 1590 | float i, f = std::modf(-arg, &i);
|
| 1591 | if(f == 0.0f)
|
| 1592 | return expr(std::numeric_limits<float>::infinity());
|
| 1593 | return expr(static_cast<float>(1.1447298858494001741434273513531-
|
| 1594 | std::log(std::abs(std::sin(3.1415926535897932384626433832795*f)))-lgamma(1.0-arg)));
|
| 1595 | }
|
| 1596 | return expr(static_cast<float>(lgamma(static_cast<double>(arg))));
|
| 1597 | #endif
|
| 1598 | }
|
| 1599 |
|
| 1600 | /// Gamma implementation.
|
| 1601 | /// \param arg function argument
|
| 1602 | /// \return function value stored in single-preicision
|
| 1603 | static expr tgamma(float arg)
|
| 1604 | {
|
| 1605 | #if HALF_ENABLE_CPP11_CMATH
|
| 1606 | return expr(std::tgamma(arg));
|
| 1607 | #else
|
| 1608 | if(arg == 0.0f)
|
| 1609 | return builtin_signbit(arg) ? expr(-std::numeric_limits<float>::infinity()) : expr(std::numeric_limits<float>::infinity());
|
| 1610 | if(arg < 0.0f)
|
| 1611 | {
|
| 1612 | float i, f = std::modf(-arg, &i);
|
| 1613 | if(f == 0.0f)
|
| 1614 | return expr(std::numeric_limits<float>::quiet_NaN());
|
| 1615 | double value = 3.1415926535897932384626433832795 / (std::sin(3.1415926535897932384626433832795*f)*std::exp(lgamma(1.0-arg)));
|
| 1616 | return expr(static_cast<float>((std::fmod(i, 2.0f)==0.0f) ? -value : value));
|
| 1617 | }
|
| 1618 | if(builtin_isinf(arg))
|
| 1619 | return expr(arg);
|
| 1620 | return expr(static_cast<float>(std::exp(lgamma(static_cast<double>(arg)))));
|
| 1621 | #endif
|
| 1622 | }
|
| 1623 |
|
| 1624 | /// Floor implementation.
|
| 1625 | /// \param arg value to round
|
| 1626 | /// \return rounded value
|
| 1627 | static half floor(half arg) { return half(binary, round_half<std::round_toward_neg_infinity>(arg.data_)); }
|
| 1628 |
|
| 1629 | /// Ceiling implementation.
|
| 1630 | /// \param arg value to round
|
| 1631 | /// \return rounded value
|
| 1632 | static half ceil(half arg) { return half(binary, round_half<std::round_toward_infinity>(arg.data_)); }
|
| 1633 |
|
| 1634 | /// Truncation implementation.
|
| 1635 | /// \param arg value to round
|
| 1636 | /// \return rounded value
|
| 1637 | static half trunc(half arg) { return half(binary, round_half<std::round_toward_zero>(arg.data_)); }
|
| 1638 |
|
| 1639 | /// Nearest integer implementation.
|
| 1640 | /// \param arg value to round
|
| 1641 | /// \return rounded value
|
| 1642 | static half round(half arg) { return half(binary, round_half_up(arg.data_)); }
|
| 1643 |
|
| 1644 | /// Nearest integer implementation.
|
| 1645 | /// \param arg value to round
|
| 1646 | /// \return rounded value
|
| 1647 | static long lround(half arg) { return detail::half2int_up<long>(arg.data_); }
|
| 1648 |
|
| 1649 | /// Nearest integer implementation.
|
| 1650 | /// \param arg value to round
|
| 1651 | /// \return rounded value
|
| 1652 | static half rint(half arg) { return half(binary, round_half<half::round_style>(arg.data_)); }
|
| 1653 |
|
| 1654 | /// Nearest integer implementation.
|
| 1655 | /// \param arg value to round
|
| 1656 | /// \return rounded value
|
| 1657 | static long lrint(half arg) { return detail::half2int<half::round_style,long>(arg.data_); }
|
| 1658 |
|
| 1659 | #if HALF_ENABLE_CPP11_LONG_LONG
|
| 1660 | /// Nearest integer implementation.
|
| 1661 | /// \param arg value to round
|
| 1662 | /// \return rounded value
|
| 1663 | static long long llround(half arg) { return detail::half2int_up<long long>(arg.data_); }
|
| 1664 |
|
| 1665 | /// Nearest integer implementation.
|
| 1666 | /// \param arg value to round
|
| 1667 | /// \return rounded value
|
| 1668 | static long long llrint(half arg) { return detail::half2int<half::round_style,long long>(arg.data_); }
|
| 1669 | #endif
|
| 1670 |
|
| 1671 | /// Decompression implementation.
|
| 1672 | /// \param arg number to decompress
|
| 1673 | /// \param exp address to store exponent at
|
| 1674 | /// \return normalized significant
|
| 1675 | static half frexp(half arg, int *exp)
|
| 1676 | {
|
| 1677 | int m = arg.data_ & 0x7FFF, e = -14;
|
| 1678 | if(m >= 0x7C00 || !m)
|
| 1679 | return *exp = 0, arg;
|
| 1680 | for(; m<0x400; m<<=1,--e) ;
|
| 1681 | return *exp = e+(m>>10), half(binary, (arg.data_&0x8000)|0x3800|(m&0x3FF));
|
| 1682 | }
|
| 1683 |
|
| 1684 | /// Decompression implementation.
|
| 1685 | /// \param arg number to decompress
|
| 1686 | /// \param iptr address to store integer part at
|
| 1687 | /// \return fractional part
|
| 1688 | static half modf(half arg, half *iptr)
|
| 1689 | {
|
| 1690 | unsigned int e = arg.data_ & 0x7FFF;
|
| 1691 | if(e >= 0x6400)
|
| 1692 | return *iptr = arg, half(binary, arg.data_&(0x8000U|-(e>0x7C00)));
|
| 1693 | if(e < 0x3C00)
|
| 1694 | return iptr->data_ = arg.data_ & 0x8000, arg;
|
| 1695 | e >>= 10;
|
| 1696 | unsigned int mask = (1<<(25-e)) - 1, m = arg.data_ & mask;
|
| 1697 | iptr->data_ = arg.data_ & ~mask;
|
| 1698 | if(!m)
|
| 1699 | return half(binary, arg.data_&0x8000);
|
| 1700 | for(; m<0x400; m<<=1,--e) ;
|
| 1701 | return half(binary, static_cast<uint16>((arg.data_&0x8000)|(e<<10)|(m&0x3FF)));
|
| 1702 | }
|
| 1703 |
|
| 1704 | /// Scaling implementation.
|
| 1705 | /// \param arg number to scale
|
| 1706 | /// \param exp power of two to scale by
|
| 1707 | /// \return scaled number
|
| 1708 | static half scalbln(half arg, long exp)
|
| 1709 | {
|
| 1710 | unsigned int m = arg.data_ & 0x7FFF;
|
| 1711 | if(m >= 0x7C00 || !m)
|
| 1712 | return arg;
|
| 1713 | for(; m<0x400; m<<=1,--exp) ;
|
| 1714 | exp += m >> 10;
|
| 1715 | uint16 value = arg.data_ & 0x8000;
|
| 1716 | if(exp > 30)
|
| 1717 | {
|
| 1718 | if(half::round_style == std::round_toward_zero)
|
| 1719 | value |= 0x7BFF;
|
| 1720 | else if(half::round_style == std::round_toward_infinity)
|
| 1721 | value |= 0x7C00 - (value>>15);
|
| 1722 | else if(half::round_style == std::round_toward_neg_infinity)
|
| 1723 | value |= 0x7BFF + (value>>15);
|
| 1724 | else
|
| 1725 | value |= 0x7C00;
|
| 1726 | }
|
| 1727 | else if(exp > 0)
|
| 1728 | value |= (exp<<10) | (m&0x3FF);
|
| 1729 | else if(exp > -11)
|
| 1730 | {
|
| 1731 | m = (m&0x3FF) | 0x400;
|
| 1732 | if(half::round_style == std::round_to_nearest)
|
| 1733 | {
|
| 1734 | m += 1 << -exp;
|
| 1735 | #if HALF_ROUND_TIES_TO_EVEN
|
| 1736 | m -= (m>>(1-exp)) & 1;
|
| 1737 | #endif
|
| 1738 | }
|
| 1739 | else if(half::round_style == std::round_toward_infinity)
|
| 1740 | m += ((value>>15)-1) & ((1<<(1-exp))-1U);
|
| 1741 | else if(half::round_style == std::round_toward_neg_infinity)
|
| 1742 | m += -(value>>15) & ((1<<(1-exp))-1U);
|
| 1743 | value |= m >> (1-exp);
|
| 1744 | }
|
| 1745 | else if(half::round_style == std::round_toward_infinity)
|
| 1746 | value -= (value>>15) - 1;
|
| 1747 | else if(half::round_style == std::round_toward_neg_infinity)
|
| 1748 | value += value >> 15;
|
| 1749 | return half(binary, value);
|
| 1750 | }
|
| 1751 |
|
| 1752 | /// Exponent implementation.
|
| 1753 | /// \param arg number to query
|
| 1754 | /// \return floating point exponent
|
| 1755 | static int ilogb(half arg)
|
| 1756 | {
|
| 1757 | int abs = arg.data_ & 0x7FFF;
|
| 1758 | if(!abs)
|
| 1759 | return FP_ILOGB0;
|
| 1760 | if(abs < 0x7C00)
|
| 1761 | {
|
| 1762 | int exp = (abs>>10) - 15;
|
| 1763 | if(abs < 0x400)
|
| 1764 | for(; abs<0x200; abs<<=1,--exp) ;
|
| 1765 | return exp;
|
| 1766 | }
|
| 1767 | if(abs > 0x7C00)
|
| 1768 | return FP_ILOGBNAN;
|
| 1769 | return INT_MAX;
|
| 1770 | }
|
| 1771 |
|
| 1772 | /// Exponent implementation.
|
| 1773 | /// \param arg number to query
|
| 1774 | /// \return floating point exponent
|
| 1775 | static half logb(half arg)
|
| 1776 | {
|
| 1777 | int abs = arg.data_ & 0x7FFF;
|
| 1778 | if(!abs)
|
| 1779 | return half(binary, 0xFC00);
|
| 1780 | if(abs < 0x7C00)
|
| 1781 | {
|
| 1782 | int exp = (abs>>10) - 15;
|
| 1783 | if(abs < 0x400)
|
| 1784 | for(; abs<0x200; abs<<=1,--exp) ;
|
| 1785 | uint16 bits = (exp<0) << 15;
|
| 1786 | if(exp)
|
| 1787 | {
|
| 1788 | unsigned int m = std::abs(exp) << 6, e = 18;
|
| 1789 | for(; m<0x400; m<<=1,--e) ;
|
| 1790 | bits |= (e<<10) + m;
|
| 1791 | }
|
| 1792 | return half(binary, bits);
|
| 1793 | }
|
| 1794 | if(abs > 0x7C00)
|
| 1795 | return arg;
|
| 1796 | return half(binary, 0x7C00);
|
| 1797 | }
|
| 1798 |
|
| 1799 | /// Enumeration implementation.
|
| 1800 | /// \param from number to increase/decrease
|
| 1801 | /// \param to direction to enumerate into
|
| 1802 | /// \return next representable number
|
| 1803 | static half nextafter(half from, half to)
|
| 1804 | {
|
| 1805 | uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF;
|
| 1806 | if(fabs > 0x7C00)
|
| 1807 | return from;
|
| 1808 | if(tabs > 0x7C00 || from.data_ == to.data_ || !(fabs|tabs))
|
| 1809 | return to;
|
| 1810 | if(!fabs)
|
| 1811 | return half(binary, (to.data_&0x8000)+1);
|
| 1812 | bool lt = ((fabs==from.data_) ? static_cast<int>(fabs) : -static_cast<int>(fabs)) <
|
| 1813 | ((tabs==to.data_) ? static_cast<int>(tabs) : -static_cast<int>(tabs));
|
| 1814 | return half(binary, from.data_+(((from.data_>>15)^static_cast<unsigned>(lt))<<1)-1);
|
| 1815 | }
|
| 1816 |
|
| 1817 | /// Enumeration implementation.
|
| 1818 | /// \param from number to increase/decrease
|
| 1819 | /// \param to direction to enumerate into
|
| 1820 | /// \return next representable number
|
| 1821 | static half nexttoward(half from, long double to)
|
| 1822 | {
|
| 1823 | if(isnan(from))
|
| 1824 | return from;
|
| 1825 | long double lfrom = static_cast<long double>(from);
|
| 1826 | if(builtin_isnan(to) || lfrom == to)
|
| 1827 | return half(static_cast<float>(to));
|
| 1828 | if(!(from.data_&0x7FFF))
|
| 1829 | return half(binary, (static_cast<detail::uint16>(builtin_signbit(to))<<15)+1);
|
| 1830 | return half(binary, from.data_+(((from.data_>>15)^static_cast<unsigned>(lfrom<to))<<1)-1);
|
| 1831 | }
|
| 1832 |
|
| 1833 | /// Sign implementation
|
| 1834 | /// \param x first operand
|
| 1835 | /// \param y second operand
|
| 1836 | /// \return composed value
|
| 1837 | static half copysign(half x, half y) { return half(binary, x.data_^((x.data_^y.data_)&0x8000)); }
|
| 1838 |
|
| 1839 | /// Classification implementation.
|
| 1840 | /// \param arg value to classify
|
| 1841 | /// \retval true if infinite number
|
| 1842 | /// \retval false else
|
| 1843 | static int fpclassify(half arg)
|
| 1844 | {
|
| 1845 | unsigned int abs = arg.data_ & 0x7FFF;
|
| 1846 | return abs ? ((abs>0x3FF) ? ((abs>=0x7C00) ? ((abs>0x7C00) ? FP_NAN : FP_INFINITE) : FP_NORMAL) :FP_SUBNORMAL) : FP_ZERO;
|
| 1847 | }
|
| 1848 |
|
| 1849 | /// Classification implementation.
|
| 1850 | /// \param arg value to classify
|
| 1851 | /// \retval true if finite number
|
| 1852 | /// \retval false else
|
| 1853 | static bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; }
|
| 1854 |
|
| 1855 | /// Classification implementation.
|
| 1856 | /// \param arg value to classify
|
| 1857 | /// \retval true if infinite number
|
| 1858 | /// \retval false else
|
| 1859 | static bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; }
|
| 1860 |
|
| 1861 | /// Classification implementation.
|
| 1862 | /// \param arg value to classify
|
| 1863 | /// \retval true if not a number
|
| 1864 | /// \retval false else
|
| 1865 | static bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; }
|
| 1866 |
|
| 1867 | /// Classification implementation.
|
| 1868 | /// \param arg value to classify
|
| 1869 | /// \retval true if normal number
|
| 1870 | /// \retval false else
|
| 1871 | static bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); }
|
| 1872 |
|
| 1873 | /// Sign bit implementation.
|
| 1874 | /// \param arg value to check
|
| 1875 | /// \retval true if signed
|
| 1876 | /// \retval false if unsigned
|
| 1877 | static bool signbit(half arg) { return (arg.data_&0x8000) != 0; }
|
| 1878 |
|
| 1879 | /// Comparison implementation.
|
| 1880 | /// \param x first operand
|
| 1881 | /// \param y second operand
|
| 1882 | /// \retval true if operands equal
|
| 1883 | /// \retval false else
|
| 1884 | static bool isequal(half x, half y) { return (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)) && !isnan(x); }
|
| 1885 |
|
| 1886 | /// Comparison implementation.
|
| 1887 | /// \param x first operand
|
| 1888 | /// \param y second operand
|
| 1889 | /// \retval true if operands not equal
|
| 1890 | /// \retval false else
|
| 1891 | static bool isnotequal(half x, half y) { return (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)) || isnan(x); }
|
| 1892 |
|
| 1893 | /// Comparison implementation.
|
| 1894 | /// \param x first operand
|
| 1895 | /// \param y second operand
|
| 1896 | /// \retval true if \a x > \a y
|
| 1897 | /// \retval false else
|
| 1898 | static bool isgreater(half x, half y)
|
| 1899 | {
|
| 1900 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
|
| 1901 | return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) > ((yabs==y.data_) ? yabs : -yabs));
|
| 1902 | }
|
| 1903 |
|
| 1904 | /// Comparison implementation.
|
| 1905 | /// \param x first operand
|
| 1906 | /// \param y second operand
|
| 1907 | /// \retval true if \a x >= \a y
|
| 1908 | /// \retval false else
|
| 1909 | static bool isgreaterequal(half x, half y)
|
| 1910 | {
|
| 1911 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
|
| 1912 | return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) >= ((yabs==y.data_) ? yabs : -yabs));
|
| 1913 | }
|
| 1914 |
|
| 1915 | /// Comparison implementation.
|
| 1916 | /// \param x first operand
|
| 1917 | /// \param y second operand
|
| 1918 | /// \retval true if \a x < \a y
|
| 1919 | /// \retval false else
|
| 1920 | static bool isless(half x, half y)
|
| 1921 | {
|
| 1922 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
|
| 1923 | return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) < ((yabs==y.data_) ? yabs : -yabs));
|
| 1924 | }
|
| 1925 |
|
| 1926 | /// Comparison implementation.
|
| 1927 | /// \param x first operand
|
| 1928 | /// \param y second operand
|
| 1929 | /// \retval true if \a x <= \a y
|
| 1930 | /// \retval false else
|
| 1931 | static bool islessequal(half x, half y)
|
| 1932 | {
|
| 1933 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
|
| 1934 | return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) <= ((yabs==y.data_) ? yabs : -yabs));
|
| 1935 | }
|
| 1936 |
|
| 1937 | /// Comparison implementation.
|
| 1938 | /// \param x first operand
|
| 1939 | /// \param y second operand
|
| 1940 | /// \retval true if either \a x > \a y nor \a x < \a y
|
| 1941 | /// \retval false else
|
| 1942 | static bool islessgreater(half x, half y)
|
| 1943 | {
|
| 1944 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
|
| 1945 | if(xabs > 0x7C00 || yabs > 0x7C00)
|
| 1946 | return false;
|
| 1947 | int a = (xabs==x.data_) ? xabs : -xabs, b = (yabs==y.data_) ? yabs : -yabs;
|
| 1948 | return a < b || a > b;
|
| 1949 | }
|
| 1950 |
|
| 1951 | /// Comparison implementation.
|
| 1952 | /// \param x first operand
|
| 1953 | /// \param y second operand
|
| 1954 | /// \retval true if operand unordered
|
| 1955 | /// \retval false else
|
| 1956 | static bool isunordered(half x, half y) { return isnan(x) || isnan(y); }
|
| 1957 |
|
| 1958 | private:
|
| 1959 | static double erf(double arg)
|
| 1960 | {
|
| 1961 | if(builtin_isinf(arg))
|
| 1962 | return (arg<0.0) ? -1.0 : 1.0;
|
| 1963 | double x2 = arg * arg, ax2 = 0.147 * x2, value = std::sqrt(1.0-std::exp(-x2*(1.2732395447351626861510701069801+ax2)/(1.0+ax2)));
|
| 1964 | return builtin_signbit(arg) ? -value : value;
|
| 1965 | }
|
| 1966 |
|
| 1967 | static double lgamma(double arg)
|
| 1968 | {
|
| 1969 | double v = 1.0;
|
| 1970 | for(; arg<8.0; ++arg) v *= arg;
|
| 1971 | double w = 1.0 / (arg*arg);
|
| 1972 | return (((((((-0.02955065359477124183006535947712*w+0.00641025641025641025641025641026)*w+
|
| 1973 | -0.00191752691752691752691752691753)*w+8.4175084175084175084175084175084e-4)*w+
|
| 1974 | -5.952380952380952380952380952381e-4)*w+7.9365079365079365079365079365079e-4)*w+
|
| 1975 | -0.00277777777777777777777777777778)*w+0.08333333333333333333333333333333)/arg +
|
| 1976 | 0.91893853320467274178032973640562 - std::log(v) - arg + (arg-0.5) * std::log(arg);
|
| 1977 | }
|
| 1978 | };
|
| 1979 |
|
| 1980 | /// Wrapper for unary half-precision functions needing specialization for individual argument types.
|
| 1981 | /// \tparam T argument type
|
| 1982 | template<typename T> struct unary_specialized
|
| 1983 | {
|
| 1984 | /// Negation implementation.
|
| 1985 | /// \param arg value to negate
|
| 1986 | /// \return negated value
|
| 1987 | static HALF_CONSTEXPR half negate(half arg) { return half(binary, arg.data_^0x8000); }
|
| 1988 |
|
| 1989 | /// Absolute value implementation.
|
| 1990 | /// \param arg function argument
|
| 1991 | /// \return absolute value
|
| 1992 | static half fabs(half arg) { return half(binary, arg.data_&0x7FFF); }
|
| 1993 | };
|
| 1994 | template<> struct unary_specialized<expr>
|
| 1995 | {
|
| 1996 | static HALF_CONSTEXPR expr negate(float arg) { return expr(-arg); }
|
| 1997 | static expr fabs(float arg) { return expr(std::fabs(arg)); }
|
| 1998 | };
|
| 1999 |
|
| 2000 | /// Wrapper for binary half-precision functions needing specialization for individual argument types.
|
| 2001 | /// \tparam T first argument type
|
| 2002 | /// \tparam U first argument type
|
| 2003 | template<typename T,typename U> struct binary_specialized
|
| 2004 | {
|
| 2005 | /// Minimum implementation.
|
| 2006 | /// \param x first operand
|
| 2007 | /// \param y second operand
|
| 2008 | /// \return minimum value
|
| 2009 | static expr fmin(float x, float y)
|
| 2010 | {
|
| 2011 | #if HALF_ENABLE_CPP11_CMATH
|
| 2012 | return expr(std::fmin(x, y));
|
| 2013 | #else
|
| 2014 | if(builtin_isnan(x))
|
| 2015 | return expr(y);
|
| 2016 | if(builtin_isnan(y))
|
| 2017 | return expr(x);
|
| 2018 | return expr(std::min(x, y));
|
| 2019 | #endif
|
| 2020 | }
|
| 2021 |
|
| 2022 | /// Maximum implementation.
|
| 2023 | /// \param x first operand
|
| 2024 | /// \param y second operand
|
| 2025 | /// \return maximum value
|
| 2026 | static expr fmax(float x, float y)
|
| 2027 | {
|
| 2028 | #if HALF_ENABLE_CPP11_CMATH
|
| 2029 | return expr(std::fmax(x, y));
|
| 2030 | #else
|
| 2031 | if(builtin_isnan(x))
|
| 2032 | return expr(y);
|
| 2033 | if(builtin_isnan(y))
|
| 2034 | return expr(x);
|
| 2035 | return expr(std::max(x, y));
|
| 2036 | #endif
|
| 2037 | }
|
| 2038 | };
|
| 2039 | template<> struct binary_specialized<half,half>
|
| 2040 | {
|
| 2041 | static half fmin(half x, half y)
|
| 2042 | {
|
| 2043 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
|
| 2044 | if(xabs > 0x7C00)
|
| 2045 | return y;
|
| 2046 | if(yabs > 0x7C00)
|
| 2047 | return x;
|
| 2048 | return (((xabs==x.data_) ? xabs : -xabs) > ((yabs==y.data_) ? yabs : -yabs)) ? y : x;
|
| 2049 | }
|
| 2050 | static half fmax(half x, half y)
|
| 2051 | {
|
| 2052 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
|
| 2053 | if(xabs > 0x7C00)
|
| 2054 | return y;
|
| 2055 | if(yabs > 0x7C00)
|
| 2056 | return x;
|
| 2057 | return (((xabs==x.data_) ? xabs : -xabs) < ((yabs==y.data_) ? yabs : -yabs)) ? y : x;
|
| 2058 | }
|
| 2059 | };
|
| 2060 |
|
| 2061 | /// Helper class for half casts.
|
| 2062 | /// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member
|
| 2063 | /// function and a corresponding `type` member denoting its return type.
|
| 2064 | /// \tparam T destination type
|
| 2065 | /// \tparam U source type
|
| 2066 | /// \tparam R rounding mode to use
|
| 2067 | template<typename T,typename U,std::float_round_style R=(std::float_round_style)(HALF_ROUND_STYLE)> struct half_caster {};
|
| 2068 | template<typename U,std::float_round_style R> struct half_caster<half,U,R>
|
| 2069 | {
|
| 2070 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
|
| 2071 | static_assert(std::is_arithmetic<U>::value, "half_cast from non-arithmetic type unsupported");
|
| 2072 | #endif
|
| 2073 |
|
| 2074 | static half cast(U arg) { return cast_impl(arg, is_float<U>()); };
|
| 2075 |
|
| 2076 | private:
|
| 2077 | static half cast_impl(U arg, true_type) { return half(binary, float2half<R>(arg)); }
|
| 2078 | static half cast_impl(U arg, false_type) { return half(binary, int2half<R>(arg)); }
|
| 2079 | };
|
| 2080 | template<typename T,std::float_round_style R> struct half_caster<T,half,R>
|
| 2081 | {
|
| 2082 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
|
| 2083 | static_assert(std::is_arithmetic<T>::value, "half_cast to non-arithmetic type unsupported");
|
| 2084 | #endif
|
| 2085 |
|
| 2086 | static T cast(half arg) { return cast_impl(arg, is_float<T>()); }
|
| 2087 |
|
| 2088 | private:
|
| 2089 | static T cast_impl(half arg, true_type) { return half2float<T>(arg.data_); }
|
| 2090 | static T cast_impl(half arg, false_type) { return half2int<R,T>(arg.data_); }
|
| 2091 | };
|
| 2092 | template<typename T,std::float_round_style R> struct half_caster<T,expr,R>
|
| 2093 | {
|
| 2094 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
|
| 2095 | static_assert(std::is_arithmetic<T>::value, "half_cast to non-arithmetic type unsupported");
|
| 2096 | #endif
|
| 2097 |
|
| 2098 | static T cast(expr arg) { return cast_impl(arg, is_float<T>()); }
|
| 2099 |
|
| 2100 | private:
|
| 2101 | static T cast_impl(float arg, true_type) { return static_cast<T>(arg); }
|
| 2102 | static T cast_impl(half arg, false_type) { return half2int<R,T>(arg.data_); }
|
| 2103 | };
|
| 2104 | template<std::float_round_style R> struct half_caster<half,half,R>
|
| 2105 | {
|
| 2106 | static half cast(half arg) { return arg; }
|
| 2107 | };
|
| 2108 | template<std::float_round_style R> struct half_caster<half,expr,R> : half_caster<half,half,R> {};
|
| 2109 |
|
| 2110 | /// \name Comparison operators
|
| 2111 | /// \{
|
| 2112 |
|
| 2113 | /// Comparison for equality.
|
| 2114 | /// \param x first operand
|
| 2115 | /// \param y second operand
|
| 2116 | /// \retval true if operands equal
|
| 2117 | /// \retval false else
|
| 2118 | template<typename T,typename U> typename enable<bool,T,U>::type operator==(T x, U y) { return functions::isequal(x, y); }
|
| 2119 |
|
| 2120 | /// Comparison for inequality.
|
| 2121 | /// \param x first operand
|
| 2122 | /// \param y second operand
|
| 2123 | /// \retval true if operands not equal
|
| 2124 | /// \retval false else
|
| 2125 | template<typename T,typename U> typename enable<bool,T,U>::type operator!=(T x, U y) { return functions::isnotequal(x, y); }
|
| 2126 |
|
| 2127 | /// Comparison for less than.
|
| 2128 | /// \param x first operand
|
| 2129 | /// \param y second operand
|
| 2130 | /// \retval true if \a x less than \a y
|
| 2131 | /// \retval false else
|
| 2132 | template<typename T,typename U> typename enable<bool,T,U>::type operator<(T x, U y) { return functions::isless(x, y); }
|
| 2133 |
|
| 2134 | /// Comparison for greater than.
|
| 2135 | /// \param x first operand
|
| 2136 | /// \param y second operand
|
| 2137 | /// \retval true if \a x greater than \a y
|
| 2138 | /// \retval false else
|
| 2139 | template<typename T,typename U> typename enable<bool,T,U>::type operator>(T x, U y) { return functions::isgreater(x, y); }
|
| 2140 |
|
| 2141 | /// Comparison for less equal.
|
| 2142 | /// \param x first operand
|
| 2143 | /// \param y second operand
|
| 2144 | /// \retval true if \a x less equal \a y
|
| 2145 | /// \retval false else
|
| 2146 | template<typename T,typename U> typename enable<bool,T,U>::type operator<=(T x, U y) { return functions::islessequal(x, y); }
|
| 2147 |
|
| 2148 | /// Comparison for greater equal.
|
| 2149 | /// \param x first operand
|
| 2150 | /// \param y second operand
|
| 2151 | /// \retval true if \a x greater equal \a y
|
| 2152 | /// \retval false else
|
| 2153 | template<typename T,typename U> typename enable<bool,T,U>::type operator>=(T x, U y) { return functions::isgreaterequal(x, y); }
|
| 2154 |
|
| 2155 | /// \}
|
| 2156 | /// \name Arithmetic operators
|
| 2157 | /// \{
|
| 2158 |
|
| 2159 | /// Add halfs.
|
| 2160 | /// \param x left operand
|
| 2161 | /// \param y right operand
|
| 2162 | /// \return sum of half expressions
|
| 2163 | template<typename T,typename U> typename enable<expr,T,U>::type operator+(T x, U y) { return functions::plus(x, y); }
|
| 2164 |
|
| 2165 | /// Subtract halfs.
|
| 2166 | /// \param x left operand
|
| 2167 | /// \param y right operand
|
| 2168 | /// \return difference of half expressions
|
| 2169 | template<typename T,typename U> typename enable<expr,T,U>::type operator-(T x, U y) { return functions::minus(x, y); }
|
| 2170 |
|
| 2171 | /// Multiply halfs.
|
| 2172 | /// \param x left operand
|
| 2173 | /// \param y right operand
|
| 2174 | /// \return product of half expressions
|
| 2175 | template<typename T,typename U> typename enable<expr,T,U>::type operator*(T x, U y) { return functions::multiplies(x, y); }
|
| 2176 |
|
| 2177 | /// Divide halfs.
|
| 2178 | /// \param x left operand
|
| 2179 | /// \param y right operand
|
| 2180 | /// \return quotient of half expressions
|
| 2181 | template<typename T,typename U> typename enable<expr,T,U>::type operator/(T x, U y) { return functions::divides(x, y); }
|
| 2182 |
|
| 2183 | /// Identity.
|
| 2184 | /// \param arg operand
|
| 2185 | /// \return uncahnged operand
|
| 2186 | template<typename T> HALF_CONSTEXPR typename enable<T,T>::type operator+(T arg) { return arg; }
|
| 2187 |
|
| 2188 | /// Negation.
|
| 2189 | /// \param arg operand
|
| 2190 | /// \return negated operand
|
| 2191 | template<typename T> HALF_CONSTEXPR typename enable<T,T>::type operator-(T arg) { return unary_specialized<T>::negate(arg); }
|
| 2192 |
|
| 2193 | /// \}
|
| 2194 | /// \name Input and output
|
| 2195 | /// \{
|
| 2196 |
|
| 2197 | /// Output operator.
|
| 2198 | /// \param out output stream to write into
|
| 2199 | /// \param arg half expression to write
|
| 2200 | /// \return reference to output stream
|
| 2201 | template<typename T,typename charT,typename traits> typename enable<std::basic_ostream<charT,traits>&,T>::type
|
| 2202 | operator<<(std::basic_ostream<charT,traits> &out, T arg) { return functions::write(out, arg); }
|
| 2203 |
|
| 2204 | /// Input operator.
|
| 2205 | /// \param in input stream to read from
|
| 2206 | /// \param arg half to read into
|
| 2207 | /// \return reference to input stream
|
| 2208 | template<typename charT,typename traits> std::basic_istream<charT,traits>&
|
| 2209 | operator>>(std::basic_istream<charT,traits> &in, half &arg) { return functions::read(in, arg); }
|
| 2210 |
|
| 2211 | /// \}
|
| 2212 | /// \name Basic mathematical operations
|
| 2213 | /// \{
|
| 2214 |
|
| 2215 | /// Absolute value.
|
| 2216 | /// \param arg operand
|
| 2217 | /// \return absolute value of \a arg
|
| 2218 | // template<typename T> typename enable<T,T>::type abs(T arg) { return unary_specialized<T>::fabs(arg); }
|
| 2219 | inline half abs(half arg) { return unary_specialized<half>::fabs(arg); }
|
| 2220 | inline expr abs(expr arg) { return unary_specialized<expr>::fabs(arg); }
|
| 2221 |
|
| 2222 | /// Absolute value.
|
| 2223 | /// \param arg operand
|
| 2224 | /// \return absolute value of \a arg
|
| 2225 | // template<typename T> typename enable<T,T>::type fabs(T arg) { return unary_specialized<T>::fabs(arg); }
|
| 2226 | inline half fabs(half arg) { return unary_specialized<half>::fabs(arg); }
|
| 2227 | inline expr fabs(expr arg) { return unary_specialized<expr>::fabs(arg); }
|
| 2228 |
|
| 2229 | /// Remainder of division.
|
| 2230 | /// \param x first operand
|
| 2231 | /// \param y second operand
|
| 2232 | /// \return remainder of floating point division.
|
| 2233 | // template<typename T,typename U> typename enable<expr,T,U>::type fmod(T x, U y) { return functions::fmod(x, y); }
|
| 2234 | inline expr fmod(half x, half y) { return functions::fmod(x, y); }
|
| 2235 | inline expr fmod(half x, expr y) { return functions::fmod(x, y); }
|
| 2236 | inline expr fmod(expr x, half y) { return functions::fmod(x, y); }
|
| 2237 | inline expr fmod(expr x, expr y) { return functions::fmod(x, y); }
|
| 2238 |
|
| 2239 | /// Remainder of division.
|
| 2240 | /// \param x first operand
|
| 2241 | /// \param y second operand
|
| 2242 | /// \return remainder of floating point division.
|
| 2243 | // template<typename T,typename U> typename enable<expr,T,U>::type remainder(T x, U y) { return functions::remainder(x, y); }
|
| 2244 | inline expr remainder(half x, half y) { return functions::remainder(x, y); }
|
| 2245 | inline expr remainder(half x, expr y) { return functions::remainder(x, y); }
|
| 2246 | inline expr remainder(expr x, half y) { return functions::remainder(x, y); }
|
| 2247 | inline expr remainder(expr x, expr y) { return functions::remainder(x, y); }
|
| 2248 |
|
| 2249 | /// Remainder of division.
|
| 2250 | /// \param x first operand
|
| 2251 | /// \param y second operand
|
| 2252 | /// \param quo address to store some bits of quotient at
|
| 2253 | /// \return remainder of floating point division.
|
| 2254 | // template<typename T,typename U> typename enable<expr,T,U>::type remquo(T x, U y, int *quo) { return functions::remquo(x, y, quo); }
|
| 2255 | inline expr remquo(half x, half y, int *quo) { return functions::remquo(x, y, quo); }
|
| 2256 | inline expr remquo(half x, expr y, int *quo) { return functions::remquo(x, y, quo); }
|
| 2257 | inline expr remquo(expr x, half y, int *quo) { return functions::remquo(x, y, quo); }
|
| 2258 | inline expr remquo(expr x, expr y, int *quo) { return functions::remquo(x, y, quo); }
|
| 2259 |
|
| 2260 | /// Fused multiply add.
|
| 2261 | /// \param x first operand
|
| 2262 | /// \param y second operand
|
| 2263 | /// \param z third operand
|
| 2264 | /// \return ( \a x * \a y ) + \a z rounded as one operation.
|
| 2265 | // template<typename T,typename U,typename V> typename enable<expr,T,U,V>::type fma(T x, U y, V z) { return functions::fma(x, y, z); }
|
| 2266 | inline expr fma(half x, half y, half z) { return functions::fma(x, y, z); }
|
| 2267 | inline expr fma(half x, half y, expr z) { return functions::fma(x, y, z); }
|
| 2268 | inline expr fma(half x, expr y, half z) { return functions::fma(x, y, z); }
|
| 2269 | inline expr fma(half x, expr y, expr z) { return functions::fma(x, y, z); }
|
| 2270 | inline expr fma(expr x, half y, half z) { return functions::fma(x, y, z); }
|
| 2271 | inline expr fma(expr x, half y, expr z) { return functions::fma(x, y, z); }
|
| 2272 | inline expr fma(expr x, expr y, half z) { return functions::fma(x, y, z); }
|
| 2273 | inline expr fma(expr x, expr y, expr z) { return functions::fma(x, y, z); }
|
| 2274 |
|
| 2275 | /// Maximum of half expressions.
|
| 2276 | /// \param x first operand
|
| 2277 | /// \param y second operand
|
| 2278 | /// \return maximum of operands
|
| 2279 | // template<typename T,typename U> typename result<T,U>::type fmax(T x, U y) { return binary_specialized<T,U>::fmax(x, y); }
|
| 2280 | inline half fmax(half x, half y) { return binary_specialized<half,half>::fmax(x, y); }
|
| 2281 | inline expr fmax(half x, expr y) { return binary_specialized<half,expr>::fmax(x, y); }
|
| 2282 | inline expr fmax(expr x, half y) { return binary_specialized<expr,half>::fmax(x, y); }
|
| 2283 | inline expr fmax(expr x, expr y) { return binary_specialized<expr,expr>::fmax(x, y); }
|
| 2284 |
|
| 2285 | /// Minimum of half expressions.
|
| 2286 | /// \param x first operand
|
| 2287 | /// \param y second operand
|
| 2288 | /// \return minimum of operands
|
| 2289 | // template<typename T,typename U> typename result<T,U>::type fmin(T x, U y) { return binary_specialized<T,U>::fmin(x, y); }
|
| 2290 | inline half fmin(half x, half y) { return binary_specialized<half,half>::fmin(x, y); }
|
| 2291 | inline expr fmin(half x, expr y) { return binary_specialized<half,expr>::fmin(x, y); }
|
| 2292 | inline expr fmin(expr x, half y) { return binary_specialized<expr,half>::fmin(x, y); }
|
| 2293 | inline expr fmin(expr x, expr y) { return binary_specialized<expr,expr>::fmin(x, y); }
|
| 2294 |
|
| 2295 | /// Positive difference.
|
| 2296 | /// \param x first operand
|
| 2297 | /// \param y second operand
|
| 2298 | /// \return \a x - \a y or 0 if difference negative
|
| 2299 | // template<typename T,typename U> typename enable<expr,T,U>::type fdim(T x, U y) { return functions::fdim(x, y); }
|
| 2300 | inline expr fdim(half x, half y) { return functions::fdim(x, y); }
|
| 2301 | inline expr fdim(half x, expr y) { return functions::fdim(x, y); }
|
| 2302 | inline expr fdim(expr x, half y) { return functions::fdim(x, y); }
|
| 2303 | inline expr fdim(expr x, expr y) { return functions::fdim(x, y); }
|
| 2304 |
|
| 2305 | /// Get NaN value.
|
| 2306 | /// \return quiet NaN
|
| 2307 | inline half nanh(const char*) { return functions::nanh(); }
|
| 2308 |
|
| 2309 | /// \}
|
| 2310 | /// \name Exponential functions
|
| 2311 | /// \{
|
| 2312 |
|
| 2313 | /// Exponential function.
|
| 2314 | /// \param arg function argument
|
| 2315 | /// \return e raised to \a arg
|
| 2316 | // template<typename T> typename enable<expr,T>::type exp(T arg) { return functions::exp(arg); }
|
| 2317 | inline expr exp(half arg) { return functions::exp(arg); }
|
| 2318 | inline expr exp(expr arg) { return functions::exp(arg); }
|
| 2319 |
|
| 2320 | /// Exponential minus one.
|
| 2321 | /// \param arg function argument
|
| 2322 | /// \return e raised to \a arg subtracted by 1
|
| 2323 | // template<typename T> typename enable<expr,T>::type expm1(T arg) { return functions::expm1(arg); }
|
| 2324 | inline expr expm1(half arg) { return functions::expm1(arg); }
|
| 2325 | inline expr expm1(expr arg) { return functions::expm1(arg); }
|
| 2326 |
|
| 2327 | /// Binary exponential.
|
| 2328 | /// \param arg function argument
|
| 2329 | /// \return 2 raised to \a arg
|
| 2330 | // template<typename T> typename enable<expr,T>::type exp2(T arg) { return functions::exp2(arg); }
|
| 2331 | inline expr exp2(half arg) { return functions::exp2(arg); }
|
| 2332 | inline expr exp2(expr arg) { return functions::exp2(arg); }
|
| 2333 |
|
| 2334 | /// Natural logorithm.
|
| 2335 | /// \param arg function argument
|
| 2336 | /// \return logarithm of \a arg to base e
|
| 2337 | // template<typename T> typename enable<expr,T>::type log(T arg) { return functions::log(arg); }
|
| 2338 | inline expr log(half arg) { return functions::log(arg); }
|
| 2339 | inline expr log(expr arg) { return functions::log(arg); }
|
| 2340 |
|
| 2341 | /// Common logorithm.
|
| 2342 | /// \param arg function argument
|
| 2343 | /// \return logarithm of \a arg to base 10
|
| 2344 | // template<typename T> typename enable<expr,T>::type log10(T arg) { return functions::log10(arg); }
|
| 2345 | inline expr log10(half arg) { return functions::log10(arg); }
|
| 2346 | inline expr log10(expr arg) { return functions::log10(arg); }
|
| 2347 |
|
| 2348 | /// Natural logorithm.
|
| 2349 | /// \param arg function argument
|
| 2350 | /// \return logarithm of \a arg plus 1 to base e
|
| 2351 | // template<typename T> typename enable<expr,T>::type log1p(T arg) { return functions::log1p(arg); }
|
| 2352 | inline expr log1p(half arg) { return functions::log1p(arg); }
|
| 2353 | inline expr log1p(expr arg) { return functions::log1p(arg); }
|
| 2354 |
|
| 2355 | /// Binary logorithm.
|
| 2356 | /// \param arg function argument
|
| 2357 | /// \return logarithm of \a arg to base 2
|
| 2358 | // template<typename T> typename enable<expr,T>::type log2(T arg) { return functions::log2(arg); }
|
| 2359 | inline expr log2(half arg) { return functions::log2(arg); }
|
| 2360 | inline expr log2(expr arg) { return functions::log2(arg); }
|
| 2361 |
|
| 2362 | /// \}
|
| 2363 | /// \name Power functions
|
| 2364 | /// \{
|
| 2365 |
|
| 2366 | /// Square root.
|
| 2367 | /// \param arg function argument
|
| 2368 | /// \return square root of \a arg
|
| 2369 | // template<typename T> typename enable<expr,T>::type sqrt(T arg) { return functions::sqrt(arg); }
|
| 2370 | inline expr sqrt(half arg) { return functions::sqrt(arg); }
|
| 2371 | inline expr sqrt(expr arg) { return functions::sqrt(arg); }
|
| 2372 |
|
| 2373 | /// Cubic root.
|
| 2374 | /// \param arg function argument
|
| 2375 | /// \return cubic root of \a arg
|
| 2376 | // template<typename T> typename enable<expr,T>::type cbrt(T arg) { return functions::cbrt(arg); }
|
| 2377 | inline expr cbrt(half arg) { return functions::cbrt(arg); }
|
| 2378 | inline expr cbrt(expr arg) { return functions::cbrt(arg); }
|
| 2379 |
|
| 2380 | /// Hypotenuse function.
|
| 2381 | /// \param x first argument
|
| 2382 | /// \param y second argument
|
| 2383 | /// \return square root of sum of squares without internal over- or underflows
|
| 2384 | // template<typename T,typename U> typename enable<expr,T,U>::type hypot(T x, U y) { return functions::hypot(x, y); }
|
| 2385 | inline expr hypot(half x, half y) { return functions::hypot(x, y); }
|
| 2386 | inline expr hypot(half x, expr y) { return functions::hypot(x, y); }
|
| 2387 | inline expr hypot(expr x, half y) { return functions::hypot(x, y); }
|
| 2388 | inline expr hypot(expr x, expr y) { return functions::hypot(x, y); }
|
| 2389 |
|
| 2390 | /// Power function.
|
| 2391 | /// \param base first argument
|
| 2392 | /// \param exp second argument
|
| 2393 | /// \return \a base raised to \a exp
|
| 2394 | // template<typename T,typename U> typename enable<expr,T,U>::type pow(T base, U exp) { return functions::pow(base, exp); }
|
| 2395 | inline expr pow(half base, half exp) { return functions::pow(base, exp); }
|
| 2396 | inline expr pow(half base, expr exp) { return functions::pow(base, exp); }
|
| 2397 | inline expr pow(expr base, half exp) { return functions::pow(base, exp); }
|
| 2398 | inline expr pow(expr base, expr exp) { return functions::pow(base, exp); }
|
| 2399 |
|
| 2400 | /// \}
|
| 2401 | /// \name Trigonometric functions
|
| 2402 | /// \{
|
| 2403 |
|
| 2404 | /// Sine function.
|
| 2405 | /// \param arg function argument
|
| 2406 | /// \return sine value of \a arg
|
| 2407 | // template<typename T> typename enable<expr,T>::type sin(T arg) { return functions::sin(arg); }
|
| 2408 | inline expr sin(half arg) { return functions::sin(arg); }
|
| 2409 | inline expr sin(expr arg) { return functions::sin(arg); }
|
| 2410 |
|
| 2411 | /// Cosine function.
|
| 2412 | /// \param arg function argument
|
| 2413 | /// \return cosine value of \a arg
|
| 2414 | // template<typename T> typename enable<expr,T>::type cos(T arg) { return functions::cos(arg); }
|
| 2415 | inline expr cos(half arg) { return functions::cos(arg); }
|
| 2416 | inline expr cos(expr arg) { return functions::cos(arg); }
|
| 2417 |
|
| 2418 | /// Tangent function.
|
| 2419 | /// \param arg function argument
|
| 2420 | /// \return tangent value of \a arg
|
| 2421 | // template<typename T> typename enable<expr,T>::type tan(T arg) { return functions::tan(arg); }
|
| 2422 | inline expr tan(half arg) { return functions::tan(arg); }
|
| 2423 | inline expr tan(expr arg) { return functions::tan(arg); }
|
| 2424 |
|
| 2425 | /// Arc sine.
|
| 2426 | /// \param arg function argument
|
| 2427 | /// \return arc sine value of \a arg
|
| 2428 | // template<typename T> typename enable<expr,T>::type asin(T arg) { return functions::asin(arg); }
|
| 2429 | inline expr asin(half arg) { return functions::asin(arg); }
|
| 2430 | inline expr asin(expr arg) { return functions::asin(arg); }
|
| 2431 |
|
| 2432 | /// Arc cosine function.
|
| 2433 | /// \param arg function argument
|
| 2434 | /// \return arc cosine value of \a arg
|
| 2435 | // template<typename T> typename enable<expr,T>::type acos(T arg) { return functions::acos(arg); }
|
| 2436 | inline expr acos(half arg) { return functions::acos(arg); }
|
| 2437 | inline expr acos(expr arg) { return functions::acos(arg); }
|
| 2438 |
|
| 2439 | /// Arc tangent function.
|
| 2440 | /// \param arg function argument
|
| 2441 | /// \return arc tangent value of \a arg
|
| 2442 | // template<typename T> typename enable<expr,T>::type atan(T arg) { return functions::atan(arg); }
|
| 2443 | inline expr atan(half arg) { return functions::atan(arg); }
|
| 2444 | inline expr atan(expr arg) { return functions::atan(arg); }
|
| 2445 |
|
| 2446 | /// Arc tangent function.
|
| 2447 | /// \param x first argument
|
| 2448 | /// \param y second argument
|
| 2449 | /// \return arc tangent value
|
| 2450 | // template<typename T,typename U> typename enable<expr,T,U>::type atan2(T x, U y) { return functions::atan2(x, y); }
|
| 2451 | inline expr atan2(half x, half y) { return functions::atan2(x, y); }
|
| 2452 | inline expr atan2(half x, expr y) { return functions::atan2(x, y); }
|
| 2453 | inline expr atan2(expr x, half y) { return functions::atan2(x, y); }
|
| 2454 | inline expr atan2(expr x, expr y) { return functions::atan2(x, y); }
|
| 2455 |
|
| 2456 | /// \}
|
| 2457 | /// \name Hyperbolic functions
|
| 2458 | /// \{
|
| 2459 |
|
| 2460 | /// Hyperbolic sine.
|
| 2461 | /// \param arg function argument
|
| 2462 | /// \return hyperbolic sine value of \a arg
|
| 2463 | // template<typename T> typename enable<expr,T>::type sinh(T arg) { return functions::sinh(arg); }
|
| 2464 | inline expr sinh(half arg) { return functions::sinh(arg); }
|
| 2465 | inline expr sinh(expr arg) { return functions::sinh(arg); }
|
| 2466 |
|
| 2467 | /// Hyperbolic cosine.
|
| 2468 | /// \param arg function argument
|
| 2469 | /// \return hyperbolic cosine value of \a arg
|
| 2470 | // template<typename T> typename enable<expr,T>::type cosh(T arg) { return functions::cosh(arg); }
|
| 2471 | inline expr cosh(half arg) { return functions::cosh(arg); }
|
| 2472 | inline expr cosh(expr arg) { return functions::cosh(arg); }
|
| 2473 |
|
| 2474 | /// Hyperbolic tangent.
|
| 2475 | /// \param arg function argument
|
| 2476 | /// \return hyperbolic tangent value of \a arg
|
| 2477 | // template<typename T> typename enable<expr,T>::type tanh(T arg) { return functions::tanh(arg); }
|
| 2478 | inline expr tanh(half arg) { return functions::tanh(arg); }
|
| 2479 | inline expr tanh(expr arg) { return functions::tanh(arg); }
|
| 2480 |
|
| 2481 | /// Hyperbolic area sine.
|
| 2482 | /// \param arg function argument
|
| 2483 | /// \return area sine value of \a arg
|
| 2484 | // template<typename T> typename enable<expr,T>::type asinh(T arg) { return functions::asinh(arg); }
|
| 2485 | inline expr asinh(half arg) { return functions::asinh(arg); }
|
| 2486 | inline expr asinh(expr arg) { return functions::asinh(arg); }
|
| 2487 |
|
| 2488 | /// Hyperbolic area cosine.
|
| 2489 | /// \param arg function argument
|
| 2490 | /// \return area cosine value of \a arg
|
| 2491 | // template<typename T> typename enable<expr,T>::type acosh(T arg) { return functions::acosh(arg); }
|
| 2492 | inline expr acosh(half arg) { return functions::acosh(arg); }
|
| 2493 | inline expr acosh(expr arg) { return functions::acosh(arg); }
|
| 2494 |
|
| 2495 | /// Hyperbolic area tangent.
|
| 2496 | /// \param arg function argument
|
| 2497 | /// \return area tangent value of \a arg
|
| 2498 | // template<typename T> typename enable<expr,T>::type atanh(T arg) { return functions::atanh(arg); }
|
| 2499 | inline expr atanh(half arg) { return functions::atanh(arg); }
|
| 2500 | inline expr atanh(expr arg) { return functions::atanh(arg); }
|
| 2501 |
|
| 2502 | /// \}
|
| 2503 | /// \name Error and gamma functions
|
| 2504 | /// \{
|
| 2505 |
|
| 2506 | /// Error function.
|
| 2507 | /// \param arg function argument
|
| 2508 | /// \return error function value of \a arg
|
| 2509 | // template<typename T> typename enable<expr,T>::type erf(T arg) { return functions::erf(arg); }
|
| 2510 | inline expr erf(half arg) { return functions::erf(arg); }
|
| 2511 | inline expr erf(expr arg) { return functions::erf(arg); }
|
| 2512 |
|
| 2513 | /// Complementary error function.
|
| 2514 | /// \param arg function argument
|
| 2515 | /// \return 1 minus error function value of \a arg
|
| 2516 | // template<typename T> typename enable<expr,T>::type erfc(T arg) { return functions::erfc(arg); }
|
| 2517 | inline expr erfc(half arg) { return functions::erfc(arg); }
|
| 2518 | inline expr erfc(expr arg) { return functions::erfc(arg); }
|
| 2519 |
|
| 2520 | /// Natural logarithm of gamma function.
|
| 2521 | /// \param arg function argument
|
| 2522 | /// \return natural logarith of gamma function for \a arg
|
| 2523 | // template<typename T> typename enable<expr,T>::type lgamma(T arg) { return functions::lgamma(arg); }
|
| 2524 | inline expr lgamma(half arg) { return functions::lgamma(arg); }
|
| 2525 | inline expr lgamma(expr arg) { return functions::lgamma(arg); }
|
| 2526 |
|
| 2527 | /// Gamma function.
|
| 2528 | /// \param arg function argument
|
| 2529 | /// \return gamma function value of \a arg
|
| 2530 | // template<typename T> typename enable<expr,T>::type tgamma(T arg) { return functions::tgamma(arg); }
|
| 2531 | inline expr tgamma(half arg) { return functions::tgamma(arg); }
|
| 2532 | inline expr tgamma(expr arg) { return functions::tgamma(arg); }
|
| 2533 |
|
| 2534 | /// \}
|
| 2535 | /// \name Rounding
|
| 2536 | /// \{
|
| 2537 |
|
| 2538 | /// Nearest integer not less than half value.
|
| 2539 | /// \param arg half to round
|
| 2540 | /// \return nearest integer not less than \a arg
|
| 2541 | // template<typename T> typename enable<half,T>::type ceil(T arg) { return functions::ceil(arg); }
|
| 2542 | inline half ceil(half arg) { return functions::ceil(arg); }
|
| 2543 | inline half ceil(expr arg) { return functions::ceil(arg); }
|
| 2544 |
|
| 2545 | /// Nearest integer not greater than half value.
|
| 2546 | /// \param arg half to round
|
| 2547 | /// \return nearest integer not greater than \a arg
|
| 2548 | // template<typename T> typename enable<half,T>::type floor(T arg) { return functions::floor(arg); }
|
| 2549 | inline half floor(half arg) { return functions::floor(arg); }
|
| 2550 | inline half floor(expr arg) { return functions::floor(arg); }
|
| 2551 |
|
| 2552 | /// Nearest integer not greater in magnitude than half value.
|
| 2553 | /// \param arg half to round
|
| 2554 | /// \return nearest integer not greater in magnitude than \a arg
|
| 2555 | // template<typename T> typename enable<half,T>::type trunc(T arg) { return functions::trunc(arg); }
|
| 2556 | inline half trunc(half arg) { return functions::trunc(arg); }
|
| 2557 | inline half trunc(expr arg) { return functions::trunc(arg); }
|
| 2558 |
|
| 2559 | /// Nearest integer.
|
| 2560 | /// \param arg half to round
|
| 2561 | /// \return nearest integer, rounded away from zero in half-way cases
|
| 2562 | // template<typename T> typename enable<half,T>::type round(T arg) { return functions::round(arg); }
|
| 2563 | inline half round(half arg) { return functions::round(arg); }
|
| 2564 | inline half round(expr arg) { return functions::round(arg); }
|
| 2565 |
|
| 2566 | /// Nearest integer.
|
| 2567 | /// \param arg half to round
|
| 2568 | /// \return nearest integer, rounded away from zero in half-way cases
|
| 2569 | // template<typename T> typename enable<long,T>::type lround(T arg) { return functions::lround(arg); }
|
| 2570 | inline long lround(half arg) { return functions::lround(arg); }
|
| 2571 | inline long lround(expr arg) { return functions::lround(arg); }
|
| 2572 |
|
| 2573 | /// Nearest integer using half's internal rounding mode.
|
| 2574 | /// \param arg half expression to round
|
| 2575 | /// \return nearest integer using default rounding mode
|
| 2576 | // template<typename T> typename enable<half,T>::type nearbyint(T arg) { return functions::nearbyint(arg); }
|
| 2577 | inline half nearbyint(half arg) { return functions::rint(arg); }
|
| 2578 | inline half nearbyint(expr arg) { return functions::rint(arg); }
|
| 2579 |
|
| 2580 | /// Nearest integer using half's internal rounding mode.
|
| 2581 | /// \param arg half expression to round
|
| 2582 | /// \return nearest integer using default rounding mode
|
| 2583 | // template<typename T> typename enable<half,T>::type rint(T arg) { return functions::rint(arg); }
|
| 2584 | inline half rint(half arg) { return functions::rint(arg); }
|
| 2585 | inline half rint(expr arg) { return functions::rint(arg); }
|
| 2586 |
|
| 2587 | /// Nearest integer using half's internal rounding mode.
|
| 2588 | /// \param arg half expression to round
|
| 2589 | /// \return nearest integer using default rounding mode
|
| 2590 | // template<typename T> typename enable<long,T>::type lrint(T arg) { return functions::lrint(arg); }
|
| 2591 | inline long lrint(half arg) { return functions::lrint(arg); }
|
| 2592 | inline long lrint(expr arg) { return functions::lrint(arg); }
|
| 2593 | #if HALF_ENABLE_CPP11_LONG_LONG
|
| 2594 | /// Nearest integer.
|
| 2595 | /// \param arg half to round
|
| 2596 | /// \return nearest integer, rounded away from zero in half-way cases
|
| 2597 | // template<typename T> typename enable<long long,T>::type llround(T arg) { return functions::llround(arg); }
|
| 2598 | inline long long llround(half arg) { return functions::llround(arg); }
|
| 2599 | inline long long llround(expr arg) { return functions::llround(arg); }
|
| 2600 |
|
| 2601 | /// Nearest integer using half's internal rounding mode.
|
| 2602 | /// \param arg half expression to round
|
| 2603 | /// \return nearest integer using default rounding mode
|
| 2604 | // template<typename T> typename enable<long long,T>::type llrint(T arg) { return functions::llrint(arg); }
|
| 2605 | inline long long llrint(half arg) { return functions::llrint(arg); }
|
| 2606 | inline long long llrint(expr arg) { return functions::llrint(arg); }
|
| 2607 | #endif
|
| 2608 |
|
| 2609 | /// \}
|
| 2610 | /// \name Floating point manipulation
|
| 2611 | /// \{
|
| 2612 |
|
| 2613 | /// Decompress floating point number.
|
| 2614 | /// \param arg number to decompress
|
| 2615 | /// \param exp address to store exponent at
|
| 2616 | /// \return significant in range [0.5, 1)
|
| 2617 | // template<typename T> typename enable<half,T>::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); }
|
| 2618 | inline half frexp(half arg, int *exp) { return functions::frexp(arg, exp); }
|
| 2619 | inline half frexp(expr arg, int *exp) { return functions::frexp(arg, exp); }
|
| 2620 |
|
| 2621 | /// Multiply by power of two.
|
| 2622 | /// \param arg number to modify
|
| 2623 | /// \param exp power of two to multiply with
|
| 2624 | /// \return \a arg multplied by 2 raised to \a exp
|
| 2625 | // template<typename T> typename enable<half,T>::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); }
|
| 2626 | inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); }
|
| 2627 | inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); }
|
| 2628 |
|
| 2629 | /// Extract integer and fractional parts.
|
| 2630 | /// \param arg number to decompress
|
| 2631 | /// \param iptr address to store integer part at
|
| 2632 | /// \return fractional part
|
| 2633 | // template<typename T> typename enable<half,T>::type modf(T arg, half *iptr) { return functions::modf(arg, iptr); }
|
| 2634 | inline half modf(half arg, half *iptr) { return functions::modf(arg, iptr); }
|
| 2635 | inline half modf(expr arg, half *iptr) { return functions::modf(arg, iptr); }
|
| 2636 |
|
| 2637 | /// Multiply by power of two.
|
| 2638 | /// \param arg number to modify
|
| 2639 | /// \param exp power of two to multiply with
|
| 2640 | /// \return \a arg multplied by 2 raised to \a exp
|
| 2641 | // template<typename T> typename enable<half,T>::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); }
|
| 2642 | inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); }
|
| 2643 | inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); }
|
| 2644 |
|
| 2645 | /// Multiply by power of two.
|
| 2646 | /// \param arg number to modify
|
| 2647 | /// \param exp power of two to multiply with
|
| 2648 | /// \return \a arg multplied by 2 raised to \a exp
|
| 2649 | // template<typename T> typename enable<half,T>::type scalbln(T arg, long exp) { return functions::scalbln(arg, exp); }
|
| 2650 | inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); }
|
| 2651 | inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); }
|
| 2652 |
|
| 2653 | /// Extract exponent.
|
| 2654 | /// \param arg number to query
|
| 2655 | /// \return floating point exponent
|
| 2656 | /// \retval FP_ILOGB0 for zero
|
| 2657 | /// \retval FP_ILOGBNAN for NaN
|
| 2658 | /// \retval MAX_INT for infinity
|
| 2659 | // template<typename T> typename enable<int,T>::type ilogb(T arg) { return functions::ilogb(arg); }
|
| 2660 | inline int ilogb(half arg) { return functions::ilogb(arg); }
|
| 2661 | inline int ilogb(expr arg) { return functions::ilogb(arg); }
|
| 2662 |
|
| 2663 | /// Extract exponent.
|
| 2664 | /// \param arg number to query
|
| 2665 | /// \return floating point exponent
|
| 2666 | // template<typename T> typename enable<half,T>::type logb(T arg) { return functions::logb(arg); }
|
| 2667 | inline half logb(half arg) { return functions::logb(arg); }
|
| 2668 | inline half logb(expr arg) { return functions::logb(arg); }
|
| 2669 |
|
| 2670 | /// Next representable value.
|
| 2671 | /// \param from value to compute next representable value for
|
| 2672 | /// \param to direction towards which to compute next value
|
| 2673 | /// \return next representable value after \a from in direction towards \a to
|
| 2674 | // template<typename T,typename U> typename enable<half,T,U>::type nextafter(T from, U to) { return functions::nextafter(from, to); }
|
| 2675 | inline half nextafter(half from, half to) { return functions::nextafter(from, to); }
|
| 2676 | inline half nextafter(half from, expr to) { return functions::nextafter(from, to); }
|
| 2677 | inline half nextafter(expr from, half to) { return functions::nextafter(from, to); }
|
| 2678 | inline half nextafter(expr from, expr to) { return functions::nextafter(from, to); }
|
| 2679 |
|
| 2680 | /// Next representable value.
|
| 2681 | /// \param from value to compute next representable value for
|
| 2682 | /// \param to direction towards which to compute next value
|
| 2683 | /// \return next representable value after \a from in direction towards \a to
|
| 2684 | // template<typename T> typename enable<half,T>::type nexttoward(T from, long double to) { return functions::nexttoward(from, to); }
|
| 2685 | inline half nexttoward(half from, long double to) { return functions::nexttoward(from, to); }
|
| 2686 | inline half nexttoward(expr from, long double to) { return functions::nexttoward(from, to); }
|
| 2687 |
|
| 2688 | /// Take sign.
|
| 2689 | /// \param x value to change sign for
|
| 2690 | /// \param y value to take sign from
|
| 2691 | /// \return value equal to \a x in magnitude and to \a y in sign
|
| 2692 | // template<typename T,typename U> typename enable<half,T,U>::type copysign(T x, U y) { return functions::copysign(x, y); }
|
| 2693 | inline half copysign(half x, half y) { return functions::copysign(x, y); }
|
| 2694 | inline half copysign(half x, expr y) { return functions::copysign(x, y); }
|
| 2695 | inline half copysign(expr x, half y) { return functions::copysign(x, y); }
|
| 2696 | inline half copysign(expr x, expr y) { return functions::copysign(x, y); }
|
| 2697 |
|
| 2698 | /// \}
|
| 2699 | /// \name Floating point classification
|
| 2700 | /// \{
|
| 2701 |
|
| 2702 |
|
| 2703 | /// Classify floating point value.
|
| 2704 | /// \param arg number to classify
|
| 2705 | /// \retval FP_ZERO for positive and negative zero
|
| 2706 | /// \retval FP_SUBNORMAL for subnormal numbers
|
| 2707 | /// \retval FP_INFINITY for positive and negative infinity
|
| 2708 | /// \retval FP_NAN for NaNs
|
| 2709 | /// \retval FP_NORMAL for all other (normal) values
|
| 2710 | // template<typename T> typename enable<int,T>::type fpclassify(T arg) { return functions::fpclassify(arg); }
|
| 2711 | inline int fpclassify(half arg) { return functions::fpclassify(arg); }
|
| 2712 | inline int fpclassify(expr arg) { return functions::fpclassify(arg); }
|
| 2713 |
|
| 2714 | /// Check if finite number.
|
| 2715 | /// \param arg number to check
|
| 2716 | /// \retval true if neither infinity nor NaN
|
| 2717 | /// \retval false else
|
| 2718 | // template<typename T> typename enable<bool,T>::type isfinite(T arg) { return functions::isfinite(arg); }
|
| 2719 | inline bool isfinite(half arg) { return functions::isfinite(arg); }
|
| 2720 | inline bool isfinite(expr arg) { return functions::isfinite(arg); }
|
| 2721 |
|
| 2722 | /// Check for infinity.
|
| 2723 | /// \param arg number to check
|
| 2724 | /// \retval true for positive or negative infinity
|
| 2725 | /// \retval false else
|
| 2726 | // template<typename T> typename enable<bool,T>::type isinf(T arg) { return functions::isinf(arg); }
|
| 2727 | inline bool isinf(half arg) { return functions::isinf(arg); }
|
| 2728 | inline bool isinf(expr arg) { return functions::isinf(arg); }
|
| 2729 |
|
| 2730 | /// Check for NaN.
|
| 2731 | /// \param arg number to check
|
| 2732 | /// \retval true for NaNs
|
| 2733 | /// \retval false else
|
| 2734 | // template<typename T> typename enable<bool,T>::type isnan(T arg) { return functions::isnan(arg); }
|
| 2735 | inline bool isnan(half arg) { return functions::isnan(arg); }
|
| 2736 | inline bool isnan(expr arg) { return functions::isnan(arg); }
|
| 2737 |
|
| 2738 | /// Check if normal number.
|
| 2739 | /// \param arg number to check
|
| 2740 | /// \retval true if normal number
|
| 2741 | /// \retval false if either subnormal, zero, infinity or NaN
|
| 2742 | // template<typename T> typename enable<bool,T>::type isnormal(T arg) { return functions::isnormal(arg); }
|
| 2743 | inline bool isnormal(half arg) { return functions::isnormal(arg); }
|
| 2744 | inline bool isnormal(expr arg) { return functions::isnormal(arg); }
|
| 2745 |
|
| 2746 | /// Check sign.
|
| 2747 | /// \param arg number to check
|
| 2748 | /// \retval true for negative number
|
| 2749 | /// \retval false for positive number
|
| 2750 | // template<typename T> typename enable<bool,T>::type signbit(T arg) { return functions::signbit(arg); }
|
| 2751 | inline bool signbit(half arg) { return functions::signbit(arg); }
|
| 2752 | inline bool signbit(expr arg) { return functions::signbit(arg); }
|
| 2753 |
|
| 2754 | /// \}
|
| 2755 | /// \name Comparison
|
| 2756 | /// \{
|
| 2757 |
|
| 2758 | /// Comparison for greater than.
|
| 2759 | /// \param x first operand
|
| 2760 | /// \param y second operand
|
| 2761 | /// \retval true if \a x greater than \a y
|
| 2762 | /// \retval false else
|
| 2763 | // template<typename T,typename U> typename enable<bool,T,U>::type isgreater(T x, U y) { return functions::isgreater(x, y); }
|
| 2764 | inline bool isgreater(half x, half y) { return functions::isgreater(x, y); }
|
| 2765 | inline bool isgreater(half x, expr y) { return functions::isgreater(x, y); }
|
| 2766 | inline bool isgreater(expr x, half y) { return functions::isgreater(x, y); }
|
| 2767 | inline bool isgreater(expr x, expr y) { return functions::isgreater(x, y); }
|
| 2768 |
|
| 2769 | /// Comparison for greater equal.
|
| 2770 | /// \param x first operand
|
| 2771 | /// \param y second operand
|
| 2772 | /// \retval true if \a x greater equal \a y
|
| 2773 | /// \retval false else
|
| 2774 | // template<typename T,typename U> typename enable<bool,T,U>::type isgreaterequal(T x, U y) { return functions::isgreaterequal(x, y); }
|
| 2775 | inline bool isgreaterequal(half x, half y) { return functions::isgreaterequal(x, y); }
|
| 2776 | inline bool isgreaterequal(half x, expr y) { return functions::isgreaterequal(x, y); }
|
| 2777 | inline bool isgreaterequal(expr x, half y) { return functions::isgreaterequal(x, y); }
|
| 2778 | inline bool isgreaterequal(expr x, expr y) { return functions::isgreaterequal(x, y); }
|
| 2779 |
|
| 2780 | /// Comparison for less than.
|
| 2781 | /// \param x first operand
|
| 2782 | /// \param y second operand
|
| 2783 | /// \retval true if \a x less than \a y
|
| 2784 | /// \retval false else
|
| 2785 | // template<typename T,typename U> typename enable<bool,T,U>::type isless(T x, U y) { return functions::isless(x, y); }
|
| 2786 | inline bool isless(half x, half y) { return functions::isless(x, y); }
|
| 2787 | inline bool isless(half x, expr y) { return functions::isless(x, y); }
|
| 2788 | inline bool isless(expr x, half y) { return functions::isless(x, y); }
|
| 2789 | inline bool isless(expr x, expr y) { return functions::isless(x, y); }
|
| 2790 |
|
| 2791 | /// Comparison for less equal.
|
| 2792 | /// \param x first operand
|
| 2793 | /// \param y second operand
|
| 2794 | /// \retval true if \a x less equal \a y
|
| 2795 | /// \retval false else
|
| 2796 | // template<typename T,typename U> typename enable<bool,T,U>::type islessequal(T x, U y) { return functions::islessequal(x, y); }
|
| 2797 | inline bool islessequal(half x, half y) { return functions::islessequal(x, y); }
|
| 2798 | inline bool islessequal(half x, expr y) { return functions::islessequal(x, y); }
|
| 2799 | inline bool islessequal(expr x, half y) { return functions::islessequal(x, y); }
|
| 2800 | inline bool islessequal(expr x, expr y) { return functions::islessequal(x, y); }
|
| 2801 |
|
| 2802 | /// Comarison for less or greater.
|
| 2803 | /// \param x first operand
|
| 2804 | /// \param y second operand
|
| 2805 | /// \retval true if either less or greater
|
| 2806 | /// \retval false else
|
| 2807 | // template<typename T,typename U> typename enable<bool,T,U>::type islessgreater(T x, U y) { return functions::islessgreater(x, y); }
|
| 2808 | inline bool islessgreater(half x, half y) { return functions::islessgreater(x, y); }
|
| 2809 | inline bool islessgreater(half x, expr y) { return functions::islessgreater(x, y); }
|
| 2810 | inline bool islessgreater(expr x, half y) { return functions::islessgreater(x, y); }
|
| 2811 | inline bool islessgreater(expr x, expr y) { return functions::islessgreater(x, y); }
|
| 2812 |
|
| 2813 | /// Check if unordered.
|
| 2814 | /// \param x first operand
|
| 2815 | /// \param y second operand
|
| 2816 | /// \retval true if unordered (one or two NaN operands)
|
| 2817 | /// \retval false else
|
| 2818 | // template<typename T,typename U> typename enable<bool,T,U>::type isunordered(T x, U y) { return functions::isunordered(x, y); }
|
| 2819 | inline bool isunordered(half x, half y) { return functions::isunordered(x, y); }
|
| 2820 | inline bool isunordered(half x, expr y) { return functions::isunordered(x, y); }
|
| 2821 | inline bool isunordered(expr x, half y) { return functions::isunordered(x, y); }
|
| 2822 | inline bool isunordered(expr x, expr y) { return functions::isunordered(x, y); }
|
| 2823 |
|
| 2824 | /// \name Casting
|
| 2825 | /// \{
|
| 2826 |
|
| 2827 | /// Cast to or from half-precision floating point number.
|
| 2828 | /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted
|
| 2829 | /// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
|
| 2830 | /// It uses the default rounding mode.
|
| 2831 | ///
|
| 2832 | /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types
|
| 2833 | /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler
|
| 2834 | /// error and casting between [half](\ref half_float::half)s is just a no-op.
|
| 2835 | /// \tparam T destination type (half or built-in arithmetic type)
|
| 2836 | /// \tparam U source type (half or built-in arithmetic type)
|
| 2837 | /// \param arg value to cast
|
| 2838 | /// \return \a arg converted to destination type
|
| 2839 | template<typename T,typename U> T half_cast(U arg) { return half_caster<T,U>::cast(arg); }
|
| 2840 |
|
| 2841 | /// Cast to or from half-precision floating point number.
|
| 2842 | /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted
|
| 2843 | /// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
|
| 2844 | ///
|
| 2845 | /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types
|
| 2846 | /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler
|
| 2847 | /// error and casting between [half](\ref half_float::half)s is just a no-op.
|
| 2848 | /// \tparam T destination type (half or built-in arithmetic type)
|
| 2849 | /// \tparam R rounding mode to use.
|
| 2850 | /// \tparam U source type (half or built-in arithmetic type)
|
| 2851 | /// \param arg value to cast
|
| 2852 | /// \return \a arg converted to destination type
|
| 2853 | template<typename T,std::float_round_style R,typename U> T half_cast(U arg) { return half_caster<T,U,R>::cast(arg); }
|
| 2854 | /// \}
|
| 2855 | }
|
| 2856 |
|
| 2857 | using detail::operator==;
|
| 2858 | using detail::operator!=;
|
| 2859 | using detail::operator<;
|
| 2860 | using detail::operator>;
|
| 2861 | using detail::operator<=;
|
| 2862 | using detail::operator>=;
|
| 2863 | using detail::operator+;
|
| 2864 | using detail::operator-;
|
| 2865 | using detail::operator*;
|
| 2866 | using detail::operator/;
|
| 2867 | using detail::operator<<;
|
| 2868 | using detail::operator>>;
|
| 2869 |
|
| 2870 | using detail::abs;
|
| 2871 | using detail::fabs;
|
| 2872 | using detail::fmod;
|
| 2873 | using detail::remainder;
|
| 2874 | using detail::remquo;
|
| 2875 | using detail::fma;
|
| 2876 | using detail::fmax;
|
| 2877 | using detail::fmin;
|
| 2878 | using detail::fdim;
|
| 2879 | using detail::nanh;
|
| 2880 | using detail::exp;
|
| 2881 | using detail::expm1;
|
| 2882 | using detail::exp2;
|
| 2883 | using detail::log;
|
| 2884 | using detail::log10;
|
| 2885 | using detail::log1p;
|
| 2886 | using detail::log2;
|
| 2887 | using detail::sqrt;
|
| 2888 | using detail::cbrt;
|
| 2889 | using detail::hypot;
|
| 2890 | using detail::pow;
|
| 2891 | using detail::sin;
|
| 2892 | using detail::cos;
|
| 2893 | using detail::tan;
|
| 2894 | using detail::asin;
|
| 2895 | using detail::acos;
|
| 2896 | using detail::atan;
|
| 2897 | using detail::atan2;
|
| 2898 | using detail::sinh;
|
| 2899 | using detail::cosh;
|
| 2900 | using detail::tanh;
|
| 2901 | using detail::asinh;
|
| 2902 | using detail::acosh;
|
| 2903 | using detail::atanh;
|
| 2904 | using detail::erf;
|
| 2905 | using detail::erfc;
|
| 2906 | using detail::lgamma;
|
| 2907 | using detail::tgamma;
|
| 2908 | using detail::ceil;
|
| 2909 | using detail::floor;
|
| 2910 | using detail::trunc;
|
| 2911 | using detail::round;
|
| 2912 | using detail::lround;
|
| 2913 | using detail::nearbyint;
|
| 2914 | using detail::rint;
|
| 2915 | using detail::lrint;
|
| 2916 | #if HALF_ENABLE_CPP11_LONG_LONG
|
| 2917 | using detail::llround;
|
| 2918 | using detail::llrint;
|
| 2919 | #endif
|
| 2920 | using detail::frexp;
|
| 2921 | using detail::ldexp;
|
| 2922 | using detail::modf;
|
| 2923 | using detail::scalbn;
|
| 2924 | using detail::scalbln;
|
| 2925 | using detail::ilogb;
|
| 2926 | using detail::logb;
|
| 2927 | using detail::nextafter;
|
| 2928 | using detail::nexttoward;
|
| 2929 | using detail::copysign;
|
| 2930 | using detail::fpclassify;
|
| 2931 | using detail::isfinite;
|
| 2932 | using detail::isinf;
|
| 2933 | using detail::isnan;
|
| 2934 | using detail::isnormal;
|
| 2935 | using detail::signbit;
|
| 2936 | using detail::isgreater;
|
| 2937 | using detail::isgreaterequal;
|
| 2938 | using detail::isless;
|
| 2939 | using detail::islessequal;
|
| 2940 | using detail::islessgreater;
|
| 2941 | using detail::isunordered;
|
| 2942 |
|
| 2943 | using detail::half_cast;
|
| 2944 | }
|
| 2945 |
|
| 2946 |
|
| 2947 | /// Extensions to the C++ standard library.
|
| 2948 | namespace std
|
| 2949 | {
|
| 2950 | /// Numeric limits for half-precision floats.
|
| 2951 | /// Because of the underlying single-precision implementation of many operations, it inherits some properties from
|
| 2952 | /// `std::numeric_limits<float>`.
|
| 2953 | template<> class numeric_limits<half_float::half> : public numeric_limits<float>
|
| 2954 | {
|
| 2955 | public:
|
| 2956 | /// Supports signed values.
|
| 2957 | static HALF_CONSTEXPR_CONST bool is_signed = true;
|
| 2958 |
|
| 2959 | /// Is not exact.
|
| 2960 | static HALF_CONSTEXPR_CONST bool is_exact = false;
|
| 2961 |
|
| 2962 | /// Doesn't provide modulo arithmetic.
|
| 2963 | static HALF_CONSTEXPR_CONST bool is_modulo = false;
|
| 2964 |
|
| 2965 | /// IEEE conformant.
|
| 2966 | static HALF_CONSTEXPR_CONST bool is_iec559 = true;
|
| 2967 |
|
| 2968 | /// Supports infinity.
|
| 2969 | static HALF_CONSTEXPR_CONST bool has_infinity = true;
|
| 2970 |
|
| 2971 | /// Supports quiet NaNs.
|
| 2972 | static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true;
|
| 2973 |
|
| 2974 | /// Supports subnormal values.
|
| 2975 | static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present;
|
| 2976 |
|
| 2977 | /// Rounding mode.
|
| 2978 | /// Due to the mix of internal single-precision computations (using the rounding mode of the underlying
|
| 2979 | /// single-precision implementation) with the rounding mode of the single-to-half conversions, the actual rounding
|
| 2980 | /// mode might be `std::round_indeterminate` if the default half-precision rounding mode doesn't match the
|
| 2981 | /// single-precision rounding mode.
|
| 2982 | static HALF_CONSTEXPR_CONST float_round_style round_style = (std::numeric_limits<float>::round_style==
|
| 2983 | half_float::half::round_style) ? half_float::half::round_style : round_indeterminate;
|
| 2984 |
|
| 2985 | /// Significant digits.
|
| 2986 | static HALF_CONSTEXPR_CONST int digits = 11;
|
| 2987 |
|
| 2988 | /// Significant decimal digits.
|
| 2989 | static HALF_CONSTEXPR_CONST int digits10 = 3;
|
| 2990 |
|
| 2991 | /// Required decimal digits to represent all possible values.
|
| 2992 | static HALF_CONSTEXPR_CONST int max_digits10 = 5;
|
| 2993 |
|
| 2994 | /// Number base.
|
| 2995 | static HALF_CONSTEXPR_CONST int radix = 2;
|
| 2996 |
|
| 2997 | /// One more than smallest exponent.
|
| 2998 | static HALF_CONSTEXPR_CONST int min_exponent = -13;
|
| 2999 |
|
| 3000 | /// Smallest normalized representable power of 10.
|
| 3001 | static HALF_CONSTEXPR_CONST int min_exponent10 = -4;
|
| 3002 |
|
| 3003 | /// One more than largest exponent
|
| 3004 | static HALF_CONSTEXPR_CONST int max_exponent = 16;
|
| 3005 |
|
| 3006 | /// Largest finitely representable power of 10.
|
| 3007 | static HALF_CONSTEXPR_CONST int max_exponent10 = 4;
|
| 3008 |
|
| 3009 | /// Smallest positive normal value.
|
| 3010 | static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); }
|
| 3011 |
|
| 3012 | /// Smallest finite value.
|
| 3013 | static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); }
|
| 3014 |
|
| 3015 | /// Largest finite value.
|
| 3016 | static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); }
|
| 3017 |
|
| 3018 | /// Difference between one and next representable value.
|
| 3019 | static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); }
|
| 3020 |
|
| 3021 | /// Maximum rounding error.
|
| 3022 | static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW
|
| 3023 | { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); }
|
| 3024 |
|
| 3025 | /// Positive infinity.
|
| 3026 | static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); }
|
| 3027 |
|
| 3028 | /// Quiet NaN.
|
| 3029 | static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); }
|
| 3030 |
|
| 3031 | /// Signalling NaN.
|
| 3032 | static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); }
|
| 3033 |
|
| 3034 | /// Smallest positive subnormal value.
|
| 3035 | static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); }
|
| 3036 | };
|
| 3037 |
|
| 3038 | #if HALF_ENABLE_CPP11_HASH
|
| 3039 | /// Hash function for half-precision floats.
|
| 3040 | /// This is only defined if C++11 `std::hash` is supported and enabled.
|
| 3041 | template<> struct hash<half_float::half> //: unary_function<half_float::half,size_t>
|
| 3042 | {
|
| 3043 | /// Type of function argument.
|
| 3044 | typedef half_float::half argument_type;
|
| 3045 |
|
| 3046 | /// Function return type.
|
| 3047 | typedef size_t result_type;
|
| 3048 |
|
| 3049 | /// Compute hash function.
|
| 3050 | /// \param arg half to hash
|
| 3051 | /// \return hash value
|
| 3052 | result_type operator()(argument_type arg) const
|
| 3053 | { return hash<half_float::detail::uint16>()(static_cast<unsigned>(arg.data_)&-(arg.data_!=0x8000)); }
|
| 3054 | };
|
| 3055 | #endif
|
| 3056 | }
|
| 3057 |
|
| 3058 |
|
| 3059 | #undef HALF_CONSTEXPR
|
| 3060 | #undef HALF_CONSTEXPR_CONST
|
| 3061 | #undef HALF_NOEXCEPT
|
| 3062 | #undef HALF_NOTHROW
|
| 3063 | #ifdef HALF_POP_WARNINGS
|
| 3064 | #pragma warning(pop)
|
| 3065 | #undef HALF_POP_WARNINGS
|
| 3066 | #endif
|
| 3067 |
|
| 3068 | #endif
|