blob: 0cf4896c1ef710bfe0748ae5e7f424b58b647fcc [file] [log] [blame]
Won Jeon520b7ca2024-04-19 14:21:00 +00001// Copyright (c) 2022-2024, ARM Limited.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef CT_CFLOAT_H
16#define CT_CFLOAT_H
17#include <algorithm>
18#include <cstdint>
19#include <cstring>
20#include <limits>
21#include <type_traits>
22#if defined(__cpp_lib_bit_cast)
23#include <bit>
24#endif // defined(__cpp_lib_bit_cast)
25
26namespace ct
27{
28/// \brief Bitfield specification of the features provided of a specified
29/// floating point type.
30enum class FloatFeatures
31{
32 None = 0x0,
33 HasNaN = 0x1, ///< The type can represent NaN values
34 HasInf = 0x2, ///< The type can represent Infinity
35 HasDenorms = 0x4, ///< The type can represent denormal/subnormal values
36};
37
38constexpr FloatFeatures operator&(const FloatFeatures& a, const FloatFeatures& b)
39{
40 using T = std::underlying_type_t<FloatFeatures>;
41 return static_cast<FloatFeatures>(static_cast<T>(a) & static_cast<T>(b));
42}
43
44constexpr FloatFeatures operator|(const FloatFeatures& a, const FloatFeatures& b)
45{
46 using T = std::underlying_type_t<FloatFeatures>;
47 return static_cast<FloatFeatures>(static_cast<T>(a) | static_cast<T>(b));
48}
49
50constexpr FloatFeatures& operator|=(FloatFeatures& a, const FloatFeatures& b)
51{
52 a = a | b;
53 return a;
54}
55
56namespace float_support
57{
58struct hidden
59{};
60
61/// \brief Get the number of bytes required to store the given number of
62/// bits.
63///
64/// NOTE This is distinct from the number of bytes required to represent
65/// the number of bits - a power of two number of bytes will always be
66/// returned by this method.
67constexpr size_t get_storage_bytes(const size_t n_bits)
68{
69 const size_t n_bytes = (n_bits + 7) / 8;
70 size_t storage_bytes = 1;
71 for (; storage_bytes < n_bytes; storage_bytes <<= 1)
72 ;
73 return storage_bytes;
74}
75
76/// \brief Utility method to convert from an older representation of the
77/// floating-point features to the FloatFeatures bitfield.
78constexpr FloatFeatures get_float_flags(bool has_nan, bool has_denorm, bool has_inf)
79{
80 FloatFeatures r = FloatFeatures::None;
81
82 if (has_nan)
83 r |= FloatFeatures::HasNaN;
84
85 if (has_denorm)
86 r |= FloatFeatures::HasDenorms;
87
88 if (has_inf)
89 r |= FloatFeatures::HasInf;
90
91 return r;
92}
93
94/// \brief Shorthand for all support features
95static constexpr FloatFeatures AllFeats = get_float_flags(true, true, true);
96
97// Map from a number of storage bytes to a suitable storage type
98template <size_t n_bytes>
99struct storage_type;
100
101#define STORAGE_TYPE(T) \
102 template <> \
103 struct storage_type<sizeof(T)> \
104 { \
105 using type = T; \
106 }
107STORAGE_TYPE(int8_t);
108STORAGE_TYPE(int16_t);
109STORAGE_TYPE(int32_t);
110STORAGE_TYPE(int64_t);
111#undef STORAGE_TYPE
112
113template <size_t n_storage_bytes>
114using storage_type_t = typename storage_type<n_storage_bytes>::type;
115
116#if defined(__cpp_lib_bit_cast)
117#define BITCAST_CONSTEXPR constexpr inline
118
119// If bit_cast is available then use it
120
121constexpr inline int32_t get_bits(const float& f)
122{
123 return std::bit_cast<int32_t>(f);
124}
125constexpr inline float from_bits(const int32_t& i)
126{
127 return std::bit_cast<float>(i);
128}
129
130#else
131#define BITCAST_CONSTEXPR inline
132
133// Otherwise `memcpy` is the safe (non-UB) of achieving the same result
134
135inline int32_t get_bits(const float& f)
136{
137 int32_t i;
138 std::memcpy(&i, &f, sizeof(float));
139 return i;
140}
141
142inline float from_bits(const int32_t& i)
143{
144 float f;
145 std::memcpy(&f, &i, sizeof(float));
146 return f;
147}
148#endif
149
150} // namespace float_support
151
152/// \brief Overflow mode for narrowing floating-point casts.
153///
154/// Determine the behaviour for values which cannot be represented by the
155/// destination type.
156enum class OverflowMode
157{
158 Saturate, ///< Map to the largest representable value
159 Overflow ///< Map to infinity (if available) or NaN
160};
161
162/// Functor for casting cfloat_advanced
163///
164/// Specific casting behavior can be specified when constructing the
165/// functor.
166///
167/// By default, OVERFLOW mode is used when the destination type has either
168/// infinity or NaN representations. Otherwise SATURATE mode is used. It is
169/// illegal to specify OVERFLOW mode for a type which has neither infinity
170/// or NaN representations - this will result in a compilation error.
171template <class in_type,
172 class out_type,
173 OverflowMode overflow_mode =
174 (out_type::has_nan || out_type::has_inf) ? OverflowMode::Overflow : OverflowMode::Saturate>
175class cfloat_cast
176{
177 constexpr static FloatFeatures in_feats = in_type::features;
178 constexpr static FloatFeatures out_feats = out_type::features;
179 constexpr static size_t in_bits = in_type::n_bits;
180 constexpr static size_t in_exp_bits = in_type::n_exponent_bits;
181 constexpr static size_t out_bits = out_type::n_bits;
182 constexpr static size_t out_exp_bits = out_type::n_exponent_bits;
183
184public:
185 constexpr cfloat_cast()
186 {
187 // SATURATE mode MUST be specified if the destination type does not
188 // have either NaN or infinity representations.
189 static_assert(overflow_mode == OverflowMode::Saturate || out_type::has_nan || out_type::has_inf);
190 }
191
192 /// \brief Cast from `in` to the given `out_type`
193 //
194 // This code relies on an understanding of the storage format used by
195 // `cfloat_advanced`. See the documentation of that class for further
196 // details.
197 constexpr out_type operator()(const in_type& in) const
198 {
199 // Shortcut for types which differ only in the number of significand
200 // bits, and where the output type is wider than the input type. For
201 // example, bfloat16 and binary32.
202 if constexpr (in_exp_bits == out_exp_bits && out_bits >= in_bits && in_feats == out_feats)
203 {
204 return out_type::from_bits(static_cast<typename out_type::storage_t>(in.bits()) << (out_bits - in_bits));
205 }
206
207 // Get initial values for the new floating point type
208 const bool sign_bit = in.sign();
209 int64_t new_exponent_bits = 0;
210 uint64_t new_significand = 0;
211
212 if (in.is_nan() || in.is_infinity())
213 {
214 new_exponent_bits = (UINT64_C(1) << out_exp_bits) - 1;
215
216 if (in.is_nan())
217 {
218 if constexpr (out_type::has_inf)
219 {
220 // Copy across the `not_quiet bit`; set the LSB.
221 // Don't attempt to copy across any of the rest of
222 // the payload.
223 new_significand = 0x1 | (((in.significand() >> (in_type::n_significand_bits - 1)) & 1)
224 << out_type::n_significand_bits);
225 }
226 else
227 {
228 new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
229 }
230 }
231 else if constexpr (out_type::has_inf && overflow_mode == OverflowMode::Saturate)
232 {
233 new_exponent_bits -= 1;
234 new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
235 }
236 else if constexpr (!out_type::has_inf && overflow_mode == OverflowMode::Saturate)
237 {
238 new_significand = (UINT64_C(1) << out_type::n_significand_bits) - (out_type::has_nan ? 2 : 1);
239 }
240 else if constexpr (!out_type::has_inf && overflow_mode == OverflowMode::Overflow)
241 {
242 new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
243 }
244 }
245 else if (!in.is_zero())
246 {
247 const int64_t this_exponent_bits = in.exponent_bits();
248 {
249 constexpr int64_t exponent_rebias = out_type::exponent_bias - in_type::exponent_bias;
250 new_exponent_bits = std::max(this_exponent_bits + exponent_rebias, exponent_rebias + 1);
251 }
252 new_significand = in.significand() << (64 - in_type::n_significand_bits);
253
254 // Normalise subnormals
255 if (this_exponent_bits == 0)
256 {
257 // Shift the most-significant 1 out of the magnitude to
258 // convert it to a significand. Modify the exponent
259 // accordingly.
260 uint8_t shift = __builtin_clzl(new_significand) + 1;
261 new_exponent_bits -= shift;
262 new_significand <<= shift;
263 }
264
265 // Apply overflow to out-of-range values; this must occur before
266 // rounding, as out-of-range values could be rounded down to the
267 // largest representable value.
268 if constexpr (overflow_mode == OverflowMode::Overflow)
269 {
270 // Determine the maximum value of exponent, and unrounded
271 // significand.
272 constexpr bool inf_and_nan = out_type::has_nan && out_type::has_inf;
273 constexpr int64_t max_exp_bits = (INT64_C(1) << out_exp_bits) - (inf_and_nan ? 2 : 1);
274 constexpr uint64_t max_significand =
275 ((UINT64_C(1) << out_type::n_significand_bits) - (inf_and_nan ? 1 : 2))
276 << (64 - out_type::n_significand_bits);
277
278 // If the exponent is strictly larger than the largest
279 // possible, or the exponent is equal to the largest
280 // possible AND the (unrounded) significand is strictly
281 // larger than the largest possible then return an
282 // appropriate overflow value.
283 if (new_exponent_bits > max_exp_bits ||
284 (new_exponent_bits == max_exp_bits && new_significand > max_significand))
285 {
286 if constexpr (out_type::has_inf)
287 return out_type::infinity(sign_bit);
288 else
289 return out_type::NaN();
290 }
291 }
292
293 // Align the significand for the output type
294 uint32_t shift = 64 - out_type::n_significand_bits;
295 const bool other_is_subnormal = new_exponent_bits <= 0;
296 if (other_is_subnormal)
297 {
298 shift += 1 - new_exponent_bits;
299 new_exponent_bits = 0;
300 }
301
302 const uint64_t shift_out = shift == 64 ? new_significand : new_significand & ((UINT64_C(1) << shift) - 1);
303 new_significand = shift == 64 ? 0 : new_significand >> shift;
304
305 // Reinsert the most-significant-one if this is a subnormal
306 // in the output type.
307 new_significand |= (other_is_subnormal ? UINT64_C(1) : 0) << (64 - shift);
308
309 // Apply rounding based on the bits shifted out of the
310 // significand
311 const uint64_t shift_half = UINT64_C(1) << (shift - 1);
312 if (shift_out > shift_half || (shift_out == shift_half && (new_significand & 1)))
313 {
314 new_significand += 1;
315
316 // Handle the case that the significand overflowed due
317 // to rounding
318 constexpr uint64_t max_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
319 if (new_significand > max_significand)
320 {
321 new_significand = 0;
322 new_exponent_bits++;
323 }
324 }
325
326 // Saturate or overflow if the value is larger than can be
327 // represented in the output type. This can only occur if the
328 // size of the exponent of the new type is not greater than the
329 // exponent of the old type.
330 if constexpr (out_exp_bits <= in_exp_bits)
331 {
332 constexpr int64_t inf_exp_bits = (INT64_C(1) << out_exp_bits) - 1;
333 if (new_exponent_bits >= inf_exp_bits)
334 {
335 if constexpr (out_type::has_inf && overflow_mode == OverflowMode::Overflow)
336 {
337 // If the output type has a representation of
338 // infinity, and we are in OVERFLOW Mode, then
339 // return infinity.
340 new_exponent_bits = inf_exp_bits;
341 new_significand = 0;
342 }
343 else if constexpr (out_type::has_inf)
344 {
345 // If the output type has a representation of
346 // infinity, and we are in SATURATE mode, then
347 // return the largest representable real number.
348 new_exponent_bits = inf_exp_bits - 1;
349 new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
350 }
351 else if (new_exponent_bits > inf_exp_bits)
352 {
353 if constexpr (overflow_mode == OverflowMode::Overflow)
354 return out_type::NaN();
355 else
356 return out_type::max(sign_bit);
357 }
358 else
359 {
360 constexpr uint64_t max_significand =
361 (UINT64_C(1) << out_type::n_significand_bits) - (out_type::has_nan ? 2 : 1);
362 if (new_significand > max_significand)
363 {
364 if constexpr (overflow_mode == OverflowMode::Saturate)
365 new_significand = max_significand;
366 else
367 return out_type::NaN();
368 }
369 }
370 }
371 }
372 }
373
374 return out_type::from_bits(sign_bit, new_exponent_bits, new_significand);
375 }
376};
377
378/// \brief Bit-accurate representation storage of IEEE754 compliant and
379/// derived floating point types.
380///
381/// Template parameters allow for specification of the number of bits, the
382/// number of exponent bits, and the features of the floating point types.
383/// The number of significand bits is `n_bits - n_exponent_bits - 1`. It is
384/// not possible to represent a signless type, such as FP8 E8M0.
385///
386/// For an imaginary 7-bit type, FP7 E4M2; the storage for various values
387/// given different floating point features is given below:
388///
389/// Value All features No infinity No features
390/// -------------------------- ------------ ----------- -----------
391/// Positive zero +0 00 0000 00 As before As before
392/// Negative zero -0 11 0000 00 As before As before
393/// Positive/negative infinity SS 1111 00 N/A N/A
394/// Signalling NaN SS 1111 01 SS 1111 11 N/A
395/// Quiet NaN SS 1111 11 N/A N/A
396/// Largest normal SS 1110 11 SS 1111 10 SS 1111 11
397/// Smallest normal SS 0001 00 As before SS 0000 01
398/// Largest denormal SS 0000 11 SS 0000 11 N/A
399///
400/// Note that the sign bit is extended to fill the storage type.
401template <size_t _n_bits, size_t n_exp_bits, FloatFeatures Feats = float_support::AllFeats>
402class cfloat_advanced
403{
404public:
405 using storage_t = float_support::storage_type_t<float_support::get_storage_bytes(_n_bits)>;
406
407 static constexpr size_t n_bits = _n_bits;
408 static constexpr size_t n_exponent_bits = n_exp_bits;
409 static constexpr size_t n_significand_bits = n_bits - (1 + n_exp_bits);
410 static constexpr int64_t exponent_bias = (INT64_C(1) << (n_exp_bits - 1)) - 1;
411
412 static constexpr FloatFeatures features = Feats;
413 static constexpr bool has_nan = (Feats & FloatFeatures::HasNaN) != FloatFeatures::None;
414 static constexpr bool has_inf = (Feats & FloatFeatures::HasInf) != FloatFeatures::None;
415 static constexpr bool has_denorms = (Feats & FloatFeatures::HasDenorms) != FloatFeatures::None;
416
417 /// \brief Construct a floating point type with the given bit
418 /// representation.
419 static constexpr cfloat_advanced from_bits(storage_t bits)
420 {
421 return cfloat_advanced(float_support::hidden(), bits);
422 }
423
424 /// \brief Construct a float from the given sign, exponent and
425 /// significand bits.
426 static constexpr cfloat_advanced from_bits(bool pm, storage_t e, storage_t s)
427 {
428 storage_t bits = pm ? -1 : 0;
429
430 bits <<= n_exp_bits;
431 bits |= e;
432
433 bits <<= n_significand_bits;
434 if (has_denorms || e)
435 bits |= s;
436
437 return cfloat_advanced(float_support::hidden(), bits);
438 }
439
440 /// \brief (Hidden) Construct a float type from a given bit pattern
441 constexpr cfloat_advanced(const float_support::hidden&, storage_t bits)
442 : m_data(bits)
443 {}
444
445 constexpr cfloat_advanced()
446 : m_data(0)
447 {}
448 constexpr cfloat_advanced(const cfloat_advanced& other)
449 : m_data(other.m_data)
450 {}
451
452 constexpr cfloat_advanced& operator=(const cfloat_advanced& other)
453 {
454 this->m_data = other.m_data;
455 return *this;
456 }
457
458 constexpr cfloat_advanced& operator=(cfloat_advanced&& other)
459 {
460 this->m_data = other.m_data;
461 return *this;
462 }
463
464 /// \brief Get a NaN representation
465 static constexpr cfloat_advanced NaN()
466 {
467 static_assert(has_nan);
468
469 // NaN is always encoded with all 1s in the exponent.
470 // If Inf exists, then NaN is encoded as a non-zero significand; if
471 // Inf doesn't exist then NaN is encoded as all ones in the
472 // significand.
473 constexpr uint64_t exp_bits = (UINT64_C(1) << n_exponent_bits) - 1;
474 constexpr uint64_t sig_bits = has_inf ? 1 : (UINT64_C(1) << n_significand_bits) - 1;
475 return cfloat_advanced::from_bits(false, exp_bits, sig_bits);
476 }
477
478 /// \brief Get a representation of infinity
479 static constexpr cfloat_advanced infinity(const bool& sign)
480 {
481 static_assert(has_inf);
482
483 // Inf is always encoded with all 1s in the exponent, and all zeros
484 // in the significand.
485 return cfloat_advanced::from_bits(sign, (UINT64_C(1) << n_exponent_bits) - 1, 0);
486 }
487
488 /// \brief Get the largest representable value
489 static constexpr cfloat_advanced max(const bool& sign)
490 {
491 if constexpr (has_nan && has_inf)
492 {
493 // Where we have NaN and Infinity, exponents all `1` corresponds
494 // to some of these values.
495 return from_bits(false, (UINT64_C(1) << n_exponent_bits) - 2, (UINT64_C(1) << n_significand_bits) - 1);
496 }
497 else if constexpr (has_nan || has_inf)
498 {
499 // Where we have either NaN or infinity (but not both),
500 // exponents all `1` AND significand all `1` corresponds to the
501 // special value.
502 return from_bits(false, (UINT64_C(1) << n_exponent_bits) - 1, (UINT64_C(1) << n_significand_bits) - 2);
503 }
504 else
505 {
506 // With no special values to encode, the maximum value is
507 // encoded as all `1`s.
508 return from_bits(false, (UINT64_C(1) << n_exponent_bits) - 1, (UINT64_C(1) << n_significand_bits) - 1);
509 }
510 }
511
512 /// \brief Cast to a different floating point representation.
513 template <size_t out_n_bits, size_t out_n_exp_bits, FloatFeatures OutFeats>
514 constexpr inline operator cfloat_advanced<out_n_bits, out_n_exp_bits, OutFeats>() const
515 {
516 using out_type = cfloat_advanced<out_n_bits, out_n_exp_bits, OutFeats>;
517 return cfloat_cast<cfloat_advanced, out_type>().operator()(*this);
518 }
519
520 /// \brief Convert from a 32-bit floating point value
521 BITCAST_CONSTEXPR
522 cfloat_advanced(const float& f)
523 {
524 // If this format exactly represents the binary32 format then get
525 // the bits from the provided float; otherwise get a binary32
526 // representation and then convert to this format.
527 if constexpr (represents_binary32())
528 m_data = float_support::get_bits(f);
529 else
530 m_data =
531 static_cast<cfloat_advanced<n_bits, n_exp_bits, Feats>>(static_cast<cfloat_advanced<32, 8>>(f)).m_data;
532 }
533
534 /// \brief Cast to a 32-bit floating point value
535 BITCAST_CONSTEXPR operator float() const
536 {
537 // If this format exactly represents the binary32 format then return
538 // a float; otherwise get a binary32 representation and then return
539 // a float.
540 if constexpr (represents_binary32())
541 return float_support::from_bits(m_data);
542 else
543 return static_cast<float>(this->operator cfloat_advanced<32, 8>());
544 }
545
546 /// \brief Return whether this type represents the IEEE754 binary32
547 /// format
548 constexpr static inline bool represents_binary32()
549 {
550 return std::is_same_v<storage_t, int32_t> && n_exp_bits == 8 && Feats == float_support::AllFeats;
551 }
552
553 constexpr auto operator-() const
554 {
555 constexpr storage_t sign_bits =
556 static_cast<storage_t>(std::numeric_limits<std::make_unsigned_t<storage_t>>::max() << (n_bits - 1));
557 return from_bits(m_data ^ sign_bits);
558 }
559
560 constexpr bool is_subnormal() const
561 {
562 return exponent_bits() == 0 && significand() != 0;
563 }
564
565 constexpr bool is_zero() const
566 {
567 return exponent_bits() == 0 && significand() == 0;
568 }
569
570 constexpr bool is_nan() const
571 {
572 return has_nan && (exponent_bits() == (UINT64_C(1) << n_exponent_bits) - 1) &&
573 ((has_inf && significand()) || (!has_inf && significand() == (UINT64_C(1) << n_significand_bits) - 1));
574 }
575
576 constexpr bool is_infinity() const
577 {
578 return has_inf && ((exponent_bits() == (UINT64_C(1) << n_exponent_bits) - 1) && (significand() == 0));
579 }
580
581 constexpr inline const storage_t& bits() const
582 {
583 return m_data;
584 }
585
586 /// \brief Get the exponent
587 constexpr inline int64_t exponent() const
588 {
589 return std::max<int64_t>(exponent_bits(), INT64_C(1)) - exponent_bias;
590 }
591
592 /// \brief Get the sign bit
593 constexpr inline bool sign() const
594 {
595 return (m_data >> (n_bits - 1)) & 0x1;
596 }
597
598 /// \brief Get the bits from the exponent
599 constexpr inline uint64_t exponent_bits() const
600 {
601 constexpr uint64_t mask = (UINT64_C(1) << n_exp_bits) - 1;
602 return (m_data >> n_significand_bits) & mask;
603 }
604
605 constexpr inline uint64_t significand() const
606 {
607 return m_data & ((UINT64_C(1) << n_significand_bits) - 1);
608 }
609
610 constexpr inline bool operator==(const cfloat_advanced& other) const
611 {
612 return !is_nan() && !other.is_nan() && // Neither operand is NaN
613 ((is_zero() && other.is_zero()) || (m_data == other.m_data));
614 }
615
616 constexpr inline bool operator!=(const cfloat_advanced& other) const
617 {
618 return !(*this == other);
619 }
620
621 constexpr inline cfloat_advanced& operator+=(const cfloat_advanced& rhs)
622 {
623 this->m_data = static_cast<cfloat_advanced>(static_cast<float>(*this) + static_cast<float>(rhs)).bits();
624 return *this;
625 }
626
627private:
628 storage_t m_data = 0;
629};
630
631// This should probably be exported so we can use it elsewhere
632#undef BITCAST_CONSTEXPR
633
634/// \brief Wrapper to maintain API compatibility with older code, which was
635/// limited to power-of-two sizes of floats.
636template <typename storage_t,
637 size_t n_exp_bits,
638 bool has_nan,
639 bool with_denorm,
640 bool with_infinity,
641 std::enable_if_t<(n_exp_bits + 1 < sizeof(storage_t) * 8), bool> = true>
642using cfloat = cfloat_advanced<sizeof(storage_t) * 8,
643 n_exp_bits,
644 float_support::get_float_flags(has_nan, with_denorm, with_infinity)>;
645
646namespace float_support
647{
648// Pre-C++23 these can't be computed as constexpr, so have to hardcode
649// them
650
651template <int>
652struct digits10; // floor(log10(2) * (digits - 1)
653template <int>
654struct max_digits10; // ceil(log10(2) * digits + 1)
655template <int>
656struct min_exponent10; // floor(log10(2) * min_exponent)
657template <int>
658struct max_exponent10; // floor(log10(2) * max_exponent)
659
660template <>
661struct digits10<8>
662{
663 constexpr static inline int value = 2;
664};
665
666template <>
667struct max_digits10<8>
668{
669 constexpr static inline int value = 4;
670};
671
672template <>
673struct digits10<10>
674{
675 constexpr static inline int value = 2;
676};
677
678template <>
679struct max_digits10<10>
680{
681 constexpr static inline int value = 5;
682};
683
684template <>
685struct digits10<24>
686{
687 constexpr static inline int value = 6;
688};
689
690template <>
691struct max_digits10<24>
692{
693 constexpr static inline int value = 9;
694};
695
696template <>
697struct min_exponent10<-13>
698{
699 constexpr static inline int value = -3;
700};
701
702template <>
703struct max_exponent10<16>
704{
705 constexpr static inline int value = 4;
706};
707
708template <>
709struct min_exponent10<-125>
710{
711 constexpr static inline int value = -37;
712};
713
714template <>
715struct max_exponent10<128>
716{
717 constexpr static inline int value = 38;
718};
719
720template <int d>
721inline constexpr int digits10_v = digits10<d>::value;
722template <int d>
723inline constexpr int max_digits10_v = max_digits10<d>::value;
724
725template <int e>
726inline constexpr int min_exponent10_v = min_exponent10<e>::value;
727
728template <int e>
729inline constexpr int max_exponent10_v = max_exponent10<e>::value;
730
731} // namespace float_support
732
733} // namespace ct
734
735namespace std
736{
737
738template <size_t n_bits, size_t n_exp_bits, ct::FloatFeatures Feats>
739struct is_floating_point<ct::cfloat_advanced<n_bits, n_exp_bits, Feats>> : std::integral_constant<bool, true>
740{};
741
742template <size_t n_bits, size_t n_exp_bits, ct::FloatFeatures Feats>
743class numeric_limits<ct::cfloat_advanced<n_bits, n_exp_bits, Feats>>
744{
745 using this_cfloat = ct::cfloat_advanced<n_bits, n_exp_bits, Feats>;
746
747public:
748 static constexpr bool is_specialized = true;
749
750 static constexpr auto min() noexcept
751 {
752 return this_cfloat::from_bits(false, 1, 0);
753 }
754
755 static constexpr auto max() noexcept
756 {
757 return this_cfloat::max(false);
758 }
759 static constexpr auto lowest() noexcept
760 {
761 return -max();
762 }
763
764 static constexpr int digits = this_cfloat::n_significand_bits + 1;
765 static constexpr int digits10 = ct::float_support::digits10_v<digits>;
766 static constexpr int max_digits10 = ct::float_support::max_digits10_v<digits>;
767
768 static constexpr bool is_signed = true;
769 static constexpr bool is_integer = false;
770 static constexpr bool is_exact = false;
771 static constexpr int radix = 2;
772
773 static constexpr auto epsilon() noexcept
774 {
775 return this_cfloat::from_bits(false, this_cfloat::exponent_bias - this_cfloat::n_significand_bits, 0);
776 }
777
778 static constexpr auto round_error() noexcept
779 {
780 return this_cfloat::from_bits(0, this_cfloat::exponent_bias - 1, 0);
781 }
782
783 static constexpr int min_exponent = (1 - this_cfloat::exponent_bias) + 1;
784 static constexpr int min_exponent10 = ct::float_support::min_exponent10_v<min_exponent>;
785 static constexpr int max_exponent = this_cfloat::exponent_bias + 1;
786 static constexpr int max_exponent10 = ct::float_support::max_exponent10_v<max_exponent>;
787
788 static constexpr bool has_infinity = this_cfloat::has_inf;
789 static constexpr bool has_quiet_NaN = this_cfloat::has_nan && this_cfloat::has_inf;
790 static constexpr bool has_signaling_NaN = this_cfloat::has_nan;
791 static constexpr float_denorm_style has_denorm = this_cfloat::has_denorms ? denorm_present : denorm_absent;
792 static constexpr bool has_denorm_loss = false;
793
794 static constexpr auto infinity() noexcept
795 {
796 if constexpr (this_cfloat::has_inf)
797 {
798 return this_cfloat::infinity(false);
799 }
800 else
801 {
802 return this_cfloat::from_bits(false, 0, 0);
803 }
804 }
805
806 static constexpr auto quiet_NaN() noexcept
807 {
808 const uint64_t exp_bits = (UINT64_C(1) << this_cfloat::n_exponent_bits) - 1;
809 const uint64_t sig_bits = this_cfloat::has_inf ? (UINT64_C(1) << (this_cfloat::n_significand_bits - 1)) | 1
810 : (UINT64_C(1) << this_cfloat::n_significand_bits) - 1;
811 return this_cfloat::from_bits(false, exp_bits, sig_bits);
812 }
813
814 static constexpr auto signaling_NaN() noexcept
815 {
816 const uint64_t exp_bits = (UINT64_C(1) << this_cfloat::n_exponent_bits) - 1;
817 const uint64_t sig_bits = this_cfloat::has_inf ? 1 : (UINT64_C(1) << this_cfloat::n_significand_bits) - 1;
818 return this_cfloat::from_bits(false, exp_bits, sig_bits);
819 }
820
821 static constexpr auto denorm_min() noexcept
822 {
823 return this_cfloat::from_bits(false, 0, 1);
824 }
825
826 static constexpr bool is_iec559 = false;
827 static constexpr bool is_bounded = false;
828 static constexpr bool is_modulo = false;
829
830 static constexpr bool traps = false;
831 static constexpr bool tinyness_before = false;
832 static constexpr float_round_style round_style = round_to_nearest;
833};
834
835} // namespace std
836
837#endif // CT_CFLOAT_H