Blame - include/cfloat.h - tosa/serialization_lib

blob: 0cf4896c1ef710bfe0748ae5e7f424b58b647fcc [file] [log] [blame]

Won Jeon	520b7ca	2024-04-19 14:21:00 +0000	[diff] [blame^]	1	// Copyright (c) 2022-2024, ARM Limited.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#ifndef CT_CFLOAT_H
				16	#define CT_CFLOAT_H
				17	#include <algorithm>
				18	#include <cstdint>
				19	#include <cstring>
				20	#include <limits>
				21	#include <type_traits>
				22	#if defined(__cpp_lib_bit_cast)
				23	#include <bit>
				24	#endif // defined(__cpp_lib_bit_cast)
				25
				26	namespace ct
				27	{
				28	/// \brief Bitfield specification of the features provided of a specified
				29	/// floating point type.
				30	enum class FloatFeatures
				31	{
				32	None = 0x0,
				33	HasNaN = 0x1, ///< The type can represent NaN values
				34	HasInf = 0x2, ///< The type can represent Infinity
				35	HasDenorms = 0x4, ///< The type can represent denormal/subnormal values
				36	};
				37
				38	constexpr FloatFeatures operator&(const FloatFeatures& a, const FloatFeatures& b)
				39	{
				40	using T = std::underlying_type_t<FloatFeatures>;
				41	return static_cast<FloatFeatures>(static_cast<T>(a) & static_cast<T>(b));
				42	}
				43
				44	constexpr FloatFeatures operator\|(const FloatFeatures& a, const FloatFeatures& b)
				45	{
				46	using T = std::underlying_type_t<FloatFeatures>;
				47	return static_cast<FloatFeatures>(static_cast<T>(a) \| static_cast<T>(b));
				48	}
				49
				50	constexpr FloatFeatures& operator\|=(FloatFeatures& a, const FloatFeatures& b)
				51	{
				52	a = a \| b;
				53	return a;
				54	}
				55
				56	namespace float_support
				57	{
				58	struct hidden
				59	{};
				60
				61	/// \brief Get the number of bytes required to store the given number of
				62	/// bits.
				63	///
				64	/// NOTE This is distinct from the number of bytes required to represent
				65	/// the number of bits - a power of two number of bytes will always be
				66	/// returned by this method.
				67	constexpr size_t get_storage_bytes(const size_t n_bits)
				68	{
				69	const size_t n_bytes = (n_bits + 7) / 8;
				70	size_t storage_bytes = 1;
				71	for (; storage_bytes < n_bytes; storage_bytes <<= 1)
				72	;
				73	return storage_bytes;
				74	}
				75
				76	/// \brief Utility method to convert from an older representation of the
				77	/// floating-point features to the FloatFeatures bitfield.
				78	constexpr FloatFeatures get_float_flags(bool has_nan, bool has_denorm, bool has_inf)
				79	{
				80	FloatFeatures r = FloatFeatures::None;
				81
				82	if (has_nan)
				83	r \|= FloatFeatures::HasNaN;
				84
				85	if (has_denorm)
				86	r \|= FloatFeatures::HasDenorms;
				87
				88	if (has_inf)
				89	r \|= FloatFeatures::HasInf;
				90
				91	return r;
				92	}
				93
				94	/// \brief Shorthand for all support features
				95	static constexpr FloatFeatures AllFeats = get_float_flags(true, true, true);
				96
				97	// Map from a number of storage bytes to a suitable storage type
				98	template <size_t n_bytes>
				99	struct storage_type;
				100
				101	#define STORAGE_TYPE(T) \
				102	template <> \
				103	struct storage_type<sizeof(T)> \
				104	{ \
				105	using type = T; \
				106	}
				107	STORAGE_TYPE(int8_t);
				108	STORAGE_TYPE(int16_t);
				109	STORAGE_TYPE(int32_t);
				110	STORAGE_TYPE(int64_t);
				111	#undef STORAGE_TYPE
				112
				113	template <size_t n_storage_bytes>
				114	using storage_type_t = typename storage_type<n_storage_bytes>::type;
				115
				116	#if defined(__cpp_lib_bit_cast)
				117	#define BITCAST_CONSTEXPR constexpr inline
				118
				119	// If bit_cast is available then use it
				120
				121	constexpr inline int32_t get_bits(const float& f)
				122	{
				123	return std::bit_cast<int32_t>(f);
				124	}
				125	constexpr inline float from_bits(const int32_t& i)
				126	{
				127	return std::bit_cast<float>(i);
				128	}
				129
				130	#else
				131	#define BITCAST_CONSTEXPR inline
				132
				133	// Otherwise `memcpy` is the safe (non-UB) of achieving the same result
				134
				135	inline int32_t get_bits(const float& f)
				136	{
				137	int32_t i;
				138	std::memcpy(&i, &f, sizeof(float));
				139	return i;
				140	}
				141
				142	inline float from_bits(const int32_t& i)
				143	{
				144	float f;
				145	std::memcpy(&f, &i, sizeof(float));
				146	return f;
				147	}
				148	#endif
				149
				150	} // namespace float_support
				151
				152	/// \brief Overflow mode for narrowing floating-point casts.
				153	///
				154	/// Determine the behaviour for values which cannot be represented by the
				155	/// destination type.
				156	enum class OverflowMode
				157	{
				158	Saturate, ///< Map to the largest representable value
				159	Overflow ///< Map to infinity (if available) or NaN
				160	};
				161
				162	/// Functor for casting cfloat_advanced
				163	///
				164	/// Specific casting behavior can be specified when constructing the
				165	/// functor.
				166	///
				167	/// By default, OVERFLOW mode is used when the destination type has either
				168	/// infinity or NaN representations. Otherwise SATURATE mode is used. It is
				169	/// illegal to specify OVERFLOW mode for a type which has neither infinity
				170	/// or NaN representations - this will result in a compilation error.
				171	template <class in_type,
				172	class out_type,
				173	OverflowMode overflow_mode =
				174	(out_type::has_nan \|\| out_type::has_inf) ? OverflowMode::Overflow : OverflowMode::Saturate>
				175	class cfloat_cast
				176	{
				177	constexpr static FloatFeatures in_feats = in_type::features;
				178	constexpr static FloatFeatures out_feats = out_type::features;
				179	constexpr static size_t in_bits = in_type::n_bits;
				180	constexpr static size_t in_exp_bits = in_type::n_exponent_bits;
				181	constexpr static size_t out_bits = out_type::n_bits;
				182	constexpr static size_t out_exp_bits = out_type::n_exponent_bits;
				183
				184	public:
				185	constexpr cfloat_cast()
				186	{
				187	// SATURATE mode MUST be specified if the destination type does not
				188	// have either NaN or infinity representations.
				189	static_assert(overflow_mode == OverflowMode::Saturate \|\| out_type::has_nan \|\| out_type::has_inf);
				190	}
				191
				192	/// \brief Cast from `in` to the given `out_type`
				193	//
				194	// This code relies on an understanding of the storage format used by
				195	// `cfloat_advanced`. See the documentation of that class for further
				196	// details.
				197	constexpr out_type operator()(const in_type& in) const
				198	{
				199	// Shortcut for types which differ only in the number of significand
				200	// bits, and where the output type is wider than the input type. For
				201	// example, bfloat16 and binary32.
				202	if constexpr (in_exp_bits == out_exp_bits && out_bits >= in_bits && in_feats == out_feats)
				203	{
				204	return out_type::from_bits(static_cast<typename out_type::storage_t>(in.bits()) << (out_bits - in_bits));
				205	}
				206
				207	// Get initial values for the new floating point type
				208	const bool sign_bit = in.sign();
				209	int64_t new_exponent_bits = 0;
				210	uint64_t new_significand = 0;
				211
				212	if (in.is_nan() \|\| in.is_infinity())
				213	{
				214	new_exponent_bits = (UINT64_C(1) << out_exp_bits) - 1;
				215
				216	if (in.is_nan())
				217	{
				218	if constexpr (out_type::has_inf)
				219	{
				220	// Copy across the `not_quiet bit`; set the LSB.
				221	// Don't attempt to copy across any of the rest of
				222	// the payload.
				223	new_significand = 0x1 \| (((in.significand() >> (in_type::n_significand_bits - 1)) & 1)
				224	<< out_type::n_significand_bits);
				225	}
				226	else
				227	{
				228	new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
				229	}
				230	}
				231	else if constexpr (out_type::has_inf && overflow_mode == OverflowMode::Saturate)
				232	{
				233	new_exponent_bits -= 1;
				234	new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
				235	}
				236	else if constexpr (!out_type::has_inf && overflow_mode == OverflowMode::Saturate)
				237	{
				238	new_significand = (UINT64_C(1) << out_type::n_significand_bits) - (out_type::has_nan ? 2 : 1);
				239	}
				240	else if constexpr (!out_type::has_inf && overflow_mode == OverflowMode::Overflow)
				241	{
				242	new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
				243	}
				244	}
				245	else if (!in.is_zero())
				246	{
				247	const int64_t this_exponent_bits = in.exponent_bits();
				248	{
				249	constexpr int64_t exponent_rebias = out_type::exponent_bias - in_type::exponent_bias;
				250	new_exponent_bits = std::max(this_exponent_bits + exponent_rebias, exponent_rebias + 1);
				251	}
				252	new_significand = in.significand() << (64 - in_type::n_significand_bits);
				253
				254	// Normalise subnormals
				255	if (this_exponent_bits == 0)
				256	{
				257	// Shift the most-significant 1 out of the magnitude to
				258	// convert it to a significand. Modify the exponent
				259	// accordingly.
				260	uint8_t shift = __builtin_clzl(new_significand) + 1;
				261	new_exponent_bits -= shift;
				262	new_significand <<= shift;
				263	}
				264
				265	// Apply overflow to out-of-range values; this must occur before
				266	// rounding, as out-of-range values could be rounded down to the
				267	// largest representable value.
				268	if constexpr (overflow_mode == OverflowMode::Overflow)
				269	{
				270	// Determine the maximum value of exponent, and unrounded
				271	// significand.
				272	constexpr bool inf_and_nan = out_type::has_nan && out_type::has_inf;
				273	constexpr int64_t max_exp_bits = (INT64_C(1) << out_exp_bits) - (inf_and_nan ? 2 : 1);
				274	constexpr uint64_t max_significand =
				275	((UINT64_C(1) << out_type::n_significand_bits) - (inf_and_nan ? 1 : 2))
				276	<< (64 - out_type::n_significand_bits);
				277
				278	// If the exponent is strictly larger than the largest
				279	// possible, or the exponent is equal to the largest
				280	// possible AND the (unrounded) significand is strictly
				281	// larger than the largest possible then return an
				282	// appropriate overflow value.
				283	if (new_exponent_bits > max_exp_bits \|\|
				284	(new_exponent_bits == max_exp_bits && new_significand > max_significand))
				285	{
				286	if constexpr (out_type::has_inf)
				287	return out_type::infinity(sign_bit);
				288	else
				289	return out_type::NaN();
				290	}
				291	}
				292
				293	// Align the significand for the output type
				294	uint32_t shift = 64 - out_type::n_significand_bits;
				295	const bool other_is_subnormal = new_exponent_bits <= 0;
				296	if (other_is_subnormal)
				297	{
				298	shift += 1 - new_exponent_bits;
				299	new_exponent_bits = 0;
				300	}
				301
				302	const uint64_t shift_out = shift == 64 ? new_significand : new_significand & ((UINT64_C(1) << shift) - 1);
				303	new_significand = shift == 64 ? 0 : new_significand >> shift;
				304
				305	// Reinsert the most-significant-one if this is a subnormal
				306	// in the output type.
				307	new_significand \|= (other_is_subnormal ? UINT64_C(1) : 0) << (64 - shift);
				308
				309	// Apply rounding based on the bits shifted out of the
				310	// significand
				311	const uint64_t shift_half = UINT64_C(1) << (shift - 1);
				312	if (shift_out > shift_half \|\| (shift_out == shift_half && (new_significand & 1)))
				313	{
				314	new_significand += 1;
				315
				316	// Handle the case that the significand overflowed due
				317	// to rounding
				318	constexpr uint64_t max_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
				319	if (new_significand > max_significand)
				320	{
				321	new_significand = 0;
				322	new_exponent_bits++;
				323	}
				324	}
				325
				326	// Saturate or overflow if the value is larger than can be
				327	// represented in the output type. This can only occur if the
				328	// size of the exponent of the new type is not greater than the
				329	// exponent of the old type.
				330	if constexpr (out_exp_bits <= in_exp_bits)
				331	{
				332	constexpr int64_t inf_exp_bits = (INT64_C(1) << out_exp_bits) - 1;
				333	if (new_exponent_bits >= inf_exp_bits)
				334	{
				335	if constexpr (out_type::has_inf && overflow_mode == OverflowMode::Overflow)
				336	{
				337	// If the output type has a representation of
				338	// infinity, and we are in OVERFLOW Mode, then
				339	// return infinity.
				340	new_exponent_bits = inf_exp_bits;
				341	new_significand = 0;
				342	}
				343	else if constexpr (out_type::has_inf)
				344	{
				345	// If the output type has a representation of
				346	// infinity, and we are in SATURATE mode, then
				347	// return the largest representable real number.
				348	new_exponent_bits = inf_exp_bits - 1;
				349	new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
				350	}
				351	else if (new_exponent_bits > inf_exp_bits)
				352	{
				353	if constexpr (overflow_mode == OverflowMode::Overflow)
				354	return out_type::NaN();
				355	else
				356	return out_type::max(sign_bit);
				357	}
				358	else
				359	{
				360	constexpr uint64_t max_significand =
				361	(UINT64_C(1) << out_type::n_significand_bits) - (out_type::has_nan ? 2 : 1);
				362	if (new_significand > max_significand)
				363	{
				364	if constexpr (overflow_mode == OverflowMode::Saturate)
				365	new_significand = max_significand;
				366	else
				367	return out_type::NaN();
				368	}
				369	}
				370	}
				371	}
				372	}
				373
				374	return out_type::from_bits(sign_bit, new_exponent_bits, new_significand);
				375	}
				376	};
				377
				378	/// \brief Bit-accurate representation storage of IEEE754 compliant and
				379	/// derived floating point types.
				380	///
				381	/// Template parameters allow for specification of the number of bits, the
				382	/// number of exponent bits, and the features of the floating point types.
				383	/// The number of significand bits is `n_bits - n_exponent_bits - 1`. It is
				384	/// not possible to represent a signless type, such as FP8 E8M0.
				385	///
				386	/// For an imaginary 7-bit type, FP7 E4M2; the storage for various values
				387	/// given different floating point features is given below:
				388	///
				389	/// Value All features No infinity No features
				390	/// -------------------------- ------------ ----------- -----------
				391	/// Positive zero +0 00 0000 00 As before As before
				392	/// Negative zero -0 11 0000 00 As before As before
				393	/// Positive/negative infinity SS 1111 00 N/A N/A
				394	/// Signalling NaN SS 1111 01 SS 1111 11 N/A
				395	/// Quiet NaN SS 1111 11 N/A N/A
				396	/// Largest normal SS 1110 11 SS 1111 10 SS 1111 11
				397	/// Smallest normal SS 0001 00 As before SS 0000 01
				398	/// Largest denormal SS 0000 11 SS 0000 11 N/A
				399	///
				400	/// Note that the sign bit is extended to fill the storage type.
				401	template <size_t _n_bits, size_t n_exp_bits, FloatFeatures Feats = float_support::AllFeats>
				402	class cfloat_advanced
				403	{
				404	public:
				405	using storage_t = float_support::storage_type_t<float_support::get_storage_bytes(_n_bits)>;
				406
				407	static constexpr size_t n_bits = _n_bits;
				408	static constexpr size_t n_exponent_bits = n_exp_bits;
				409	static constexpr size_t n_significand_bits = n_bits - (1 + n_exp_bits);
				410	static constexpr int64_t exponent_bias = (INT64_C(1) << (n_exp_bits - 1)) - 1;
				411
				412	static constexpr FloatFeatures features = Feats;
				413	static constexpr bool has_nan = (Feats & FloatFeatures::HasNaN) != FloatFeatures::None;
				414	static constexpr bool has_inf = (Feats & FloatFeatures::HasInf) != FloatFeatures::None;
				415	static constexpr bool has_denorms = (Feats & FloatFeatures::HasDenorms) != FloatFeatures::None;
				416
				417	/// \brief Construct a floating point type with the given bit
				418	/// representation.
				419	static constexpr cfloat_advanced from_bits(storage_t bits)
				420	{
				421	return cfloat_advanced(float_support::hidden(), bits);
				422	}
				423
				424	/// \brief Construct a float from the given sign, exponent and
				425	/// significand bits.
				426	static constexpr cfloat_advanced from_bits(bool pm, storage_t e, storage_t s)
				427	{
				428	storage_t bits = pm ? -1 : 0;
				429
				430	bits <<= n_exp_bits;
				431	bits \|= e;
				432
				433	bits <<= n_significand_bits;
				434	if (has_denorms \|\| e)
				435	bits \|= s;
				436
				437	return cfloat_advanced(float_support::hidden(), bits);
				438	}
				439
				440	/// \brief (Hidden) Construct a float type from a given bit pattern
				441	constexpr cfloat_advanced(const float_support::hidden&, storage_t bits)
				442	: m_data(bits)
				443	{}
				444
				445	constexpr cfloat_advanced()
				446	: m_data(0)
				447	{}
				448	constexpr cfloat_advanced(const cfloat_advanced& other)
				449	: m_data(other.m_data)
				450	{}
				451
				452	constexpr cfloat_advanced& operator=(const cfloat_advanced& other)
				453	{
				454	this->m_data = other.m_data;
				455	return *this;
				456	}
				457
				458	constexpr cfloat_advanced& operator=(cfloat_advanced&& other)
				459	{
				460	this->m_data = other.m_data;
				461	return *this;
				462	}
				463
				464	/// \brief Get a NaN representation
				465	static constexpr cfloat_advanced NaN()
				466	{
				467	static_assert(has_nan);
				468
				469	// NaN is always encoded with all 1s in the exponent.
				470	// If Inf exists, then NaN is encoded as a non-zero significand; if
				471	// Inf doesn't exist then NaN is encoded as all ones in the
				472	// significand.
				473	constexpr uint64_t exp_bits = (UINT64_C(1) << n_exponent_bits) - 1;
				474	constexpr uint64_t sig_bits = has_inf ? 1 : (UINT64_C(1) << n_significand_bits) - 1;
				475	return cfloat_advanced::from_bits(false, exp_bits, sig_bits);
				476	}
				477
				478	/// \brief Get a representation of infinity
				479	static constexpr cfloat_advanced infinity(const bool& sign)
				480	{
				481	static_assert(has_inf);
				482
				483	// Inf is always encoded with all 1s in the exponent, and all zeros
				484	// in the significand.
				485	return cfloat_advanced::from_bits(sign, (UINT64_C(1) << n_exponent_bits) - 1, 0);
				486	}
				487
				488	/// \brief Get the largest representable value
				489	static constexpr cfloat_advanced max(const bool& sign)
				490	{
				491	if constexpr (has_nan && has_inf)
				492	{
				493	// Where we have NaN and Infinity, exponents all `1` corresponds
				494	// to some of these values.
				495	return from_bits(false, (UINT64_C(1) << n_exponent_bits) - 2, (UINT64_C(1) << n_significand_bits) - 1);
				496	}
				497	else if constexpr (has_nan \|\| has_inf)
				498	{
				499	// Where we have either NaN or infinity (but not both),
				500	// exponents all `1` AND significand all `1` corresponds to the
				501	// special value.
				502	return from_bits(false, (UINT64_C(1) << n_exponent_bits) - 1, (UINT64_C(1) << n_significand_bits) - 2);
				503	}
				504	else
				505	{
				506	// With no special values to encode, the maximum value is
				507	// encoded as all `1`s.
				508	return from_bits(false, (UINT64_C(1) << n_exponent_bits) - 1, (UINT64_C(1) << n_significand_bits) - 1);
				509	}
				510	}
				511
				512	/// \brief Cast to a different floating point representation.
				513	template <size_t out_n_bits, size_t out_n_exp_bits, FloatFeatures OutFeats>
				514	constexpr inline operator cfloat_advanced<out_n_bits, out_n_exp_bits, OutFeats>() const
				515	{
				516	using out_type = cfloat_advanced<out_n_bits, out_n_exp_bits, OutFeats>;
				517	return cfloat_cast<cfloat_advanced, out_type>().operator()(*this);
				518	}
				519
				520	/// \brief Convert from a 32-bit floating point value
				521	BITCAST_CONSTEXPR
				522	cfloat_advanced(const float& f)
				523	{
				524	// If this format exactly represents the binary32 format then get
				525	// the bits from the provided float; otherwise get a binary32
				526	// representation and then convert to this format.
				527	if constexpr (represents_binary32())
				528	m_data = float_support::get_bits(f);
				529	else
				530	m_data =
				531	static_cast<cfloat_advanced<n_bits, n_exp_bits, Feats>>(static_cast<cfloat_advanced<32, 8>>(f)).m_data;
				532	}
				533
				534	/// \brief Cast to a 32-bit floating point value
				535	BITCAST_CONSTEXPR operator float() const
				536	{
				537	// If this format exactly represents the binary32 format then return
				538	// a float; otherwise get a binary32 representation and then return
				539	// a float.
				540	if constexpr (represents_binary32())
				541	return float_support::from_bits(m_data);
				542	else
				543	return static_cast<float>(this->operator cfloat_advanced<32, 8>());
				544	}
				545
				546	/// \brief Return whether this type represents the IEEE754 binary32
				547	/// format
				548	constexpr static inline bool represents_binary32()
				549	{
				550	return std::is_same_v<storage_t, int32_t> && n_exp_bits == 8 && Feats == float_support::AllFeats;
				551	}
				552
				553	constexpr auto operator-() const
				554	{
				555	constexpr storage_t sign_bits =
				556	static_cast<storage_t>(std::numeric_limits<std::make_unsigned_t<storage_t>>::max() << (n_bits - 1));
				557	return from_bits(m_data ^ sign_bits);
				558	}
				559
				560	constexpr bool is_subnormal() const
				561	{
				562	return exponent_bits() == 0 && significand() != 0;
				563	}
				564
				565	constexpr bool is_zero() const
				566	{
				567	return exponent_bits() == 0 && significand() == 0;
				568	}
				569
				570	constexpr bool is_nan() const
				571	{
				572	return has_nan && (exponent_bits() == (UINT64_C(1) << n_exponent_bits) - 1) &&
				573	((has_inf && significand()) \|\| (!has_inf && significand() == (UINT64_C(1) << n_significand_bits) - 1));
				574	}
				575
				576	constexpr bool is_infinity() const
				577	{
				578	return has_inf && ((exponent_bits() == (UINT64_C(1) << n_exponent_bits) - 1) && (significand() == 0));
				579	}
				580
				581	constexpr inline const storage_t& bits() const
				582	{
				583	return m_data;
				584	}
				585
				586	/// \brief Get the exponent
				587	constexpr inline int64_t exponent() const
				588	{
				589	return std::max<int64_t>(exponent_bits(), INT64_C(1)) - exponent_bias;
				590	}
				591
				592	/// \brief Get the sign bit
				593	constexpr inline bool sign() const
				594	{
				595	return (m_data >> (n_bits - 1)) & 0x1;
				596	}
				597
				598	/// \brief Get the bits from the exponent
				599	constexpr inline uint64_t exponent_bits() const
				600	{
				601	constexpr uint64_t mask = (UINT64_C(1) << n_exp_bits) - 1;
				602	return (m_data >> n_significand_bits) & mask;
				603	}
				604
				605	constexpr inline uint64_t significand() const
				606	{
				607	return m_data & ((UINT64_C(1) << n_significand_bits) - 1);
				608	}
				609
				610	constexpr inline bool operator==(const cfloat_advanced& other) const
				611	{
				612	return !is_nan() && !other.is_nan() && // Neither operand is NaN
				613	((is_zero() && other.is_zero()) \|\| (m_data == other.m_data));
				614	}
				615
				616	constexpr inline bool operator!=(const cfloat_advanced& other) const
				617	{
				618	return !(*this == other);
				619	}
				620
				621	constexpr inline cfloat_advanced& operator+=(const cfloat_advanced& rhs)
				622	{
				623	this->m_data = static_cast<cfloat_advanced>(static_cast<float>(*this) + static_cast<float>(rhs)).bits();
				624	return *this;
				625	}
				626
				627	private:
				628	storage_t m_data = 0;
				629	};
				630
				631	// This should probably be exported so we can use it elsewhere
				632	#undef BITCAST_CONSTEXPR
				633
				634	/// \brief Wrapper to maintain API compatibility with older code, which was
				635	/// limited to power-of-two sizes of floats.
				636	template <typename storage_t,
				637	size_t n_exp_bits,
				638	bool has_nan,
				639	bool with_denorm,
				640	bool with_infinity,
				641	std::enable_if_t<(n_exp_bits + 1 < sizeof(storage_t) * 8), bool> = true>
				642	using cfloat = cfloat_advanced<sizeof(storage_t) * 8,
				643	n_exp_bits,
				644	float_support::get_float_flags(has_nan, with_denorm, with_infinity)>;
				645
				646	namespace float_support
				647	{
				648	// Pre-C++23 these can't be computed as constexpr, so have to hardcode
				649	// them
				650
				651	template <int>
				652	struct digits10; // floor(log10(2) * (digits - 1)
				653	template <int>
				654	struct max_digits10; // ceil(log10(2) * digits + 1)
				655	template <int>
				656	struct min_exponent10; // floor(log10(2) * min_exponent)
				657	template <int>
				658	struct max_exponent10; // floor(log10(2) * max_exponent)
				659
				660	template <>
				661	struct digits10<8>
				662	{
				663	constexpr static inline int value = 2;
				664	};
				665
				666	template <>
				667	struct max_digits10<8>
				668	{
				669	constexpr static inline int value = 4;
				670	};
				671
				672	template <>
				673	struct digits10<10>
				674	{
				675	constexpr static inline int value = 2;
				676	};
				677
				678	template <>
				679	struct max_digits10<10>
				680	{
				681	constexpr static inline int value = 5;
				682	};
				683
				684	template <>
				685	struct digits10<24>
				686	{
				687	constexpr static inline int value = 6;
				688	};
				689
				690	template <>
				691	struct max_digits10<24>
				692	{
				693	constexpr static inline int value = 9;
				694	};
				695
				696	template <>
				697	struct min_exponent10<-13>
				698	{
				699	constexpr static inline int value = -3;
				700	};
				701
				702	template <>
				703	struct max_exponent10<16>
				704	{
				705	constexpr static inline int value = 4;
				706	};
				707
				708	template <>
				709	struct min_exponent10<-125>
				710	{
				711	constexpr static inline int value = -37;
				712	};
				713
				714	template <>
				715	struct max_exponent10<128>
				716	{
				717	constexpr static inline int value = 38;
				718	};
				719
				720	template <int d>
				721	inline constexpr int digits10_v = digits10<d>::value;
				722	template <int d>
				723	inline constexpr int max_digits10_v = max_digits10<d>::value;
				724
				725	template <int e>
				726	inline constexpr int min_exponent10_v = min_exponent10<e>::value;
				727
				728	template <int e>
				729	inline constexpr int max_exponent10_v = max_exponent10<e>::value;
				730
				731	} // namespace float_support
				732
				733	} // namespace ct
				734
				735	namespace std
				736	{
				737
				738	template <size_t n_bits, size_t n_exp_bits, ct::FloatFeatures Feats>
				739	struct is_floating_point<ct::cfloat_advanced<n_bits, n_exp_bits, Feats>> : std::integral_constant<bool, true>
				740	{};
				741
				742	template <size_t n_bits, size_t n_exp_bits, ct::FloatFeatures Feats>
				743	class numeric_limits<ct::cfloat_advanced<n_bits, n_exp_bits, Feats>>
				744	{
				745	using this_cfloat = ct::cfloat_advanced<n_bits, n_exp_bits, Feats>;
				746
				747	public:
				748	static constexpr bool is_specialized = true;
				749
				750	static constexpr auto min() noexcept
				751	{
				752	return this_cfloat::from_bits(false, 1, 0);
				753	}
				754
				755	static constexpr auto max() noexcept
				756	{
				757	return this_cfloat::max(false);
				758	}
				759	static constexpr auto lowest() noexcept
				760	{
				761	return -max();
				762	}
				763
				764	static constexpr int digits = this_cfloat::n_significand_bits + 1;
				765	static constexpr int digits10 = ct::float_support::digits10_v<digits>;
				766	static constexpr int max_digits10 = ct::float_support::max_digits10_v<digits>;
				767
				768	static constexpr bool is_signed = true;
				769	static constexpr bool is_integer = false;
				770	static constexpr bool is_exact = false;
				771	static constexpr int radix = 2;
				772
				773	static constexpr auto epsilon() noexcept
				774	{
				775	return this_cfloat::from_bits(false, this_cfloat::exponent_bias - this_cfloat::n_significand_bits, 0);
				776	}
				777
				778	static constexpr auto round_error() noexcept
				779	{
				780	return this_cfloat::from_bits(0, this_cfloat::exponent_bias - 1, 0);
				781	}
				782
				783	static constexpr int min_exponent = (1 - this_cfloat::exponent_bias) + 1;
				784	static constexpr int min_exponent10 = ct::float_support::min_exponent10_v<min_exponent>;
				785	static constexpr int max_exponent = this_cfloat::exponent_bias + 1;
				786	static constexpr int max_exponent10 = ct::float_support::max_exponent10_v<max_exponent>;
				787
				788	static constexpr bool has_infinity = this_cfloat::has_inf;
				789	static constexpr bool has_quiet_NaN = this_cfloat::has_nan && this_cfloat::has_inf;
				790	static constexpr bool has_signaling_NaN = this_cfloat::has_nan;
				791	static constexpr float_denorm_style has_denorm = this_cfloat::has_denorms ? denorm_present : denorm_absent;
				792	static constexpr bool has_denorm_loss = false;
				793
				794	static constexpr auto infinity() noexcept
				795	{
				796	if constexpr (this_cfloat::has_inf)
				797	{
				798	return this_cfloat::infinity(false);
				799	}
				800	else
				801	{
				802	return this_cfloat::from_bits(false, 0, 0);
				803	}
				804	}
				805
				806	static constexpr auto quiet_NaN() noexcept
				807	{
				808	const uint64_t exp_bits = (UINT64_C(1) << this_cfloat::n_exponent_bits) - 1;
				809	const uint64_t sig_bits = this_cfloat::has_inf ? (UINT64_C(1) << (this_cfloat::n_significand_bits - 1)) \| 1
				810	: (UINT64_C(1) << this_cfloat::n_significand_bits) - 1;
				811	return this_cfloat::from_bits(false, exp_bits, sig_bits);
				812	}
				813
				814	static constexpr auto signaling_NaN() noexcept
				815	{
				816	const uint64_t exp_bits = (UINT64_C(1) << this_cfloat::n_exponent_bits) - 1;
				817	const uint64_t sig_bits = this_cfloat::has_inf ? 1 : (UINT64_C(1) << this_cfloat::n_significand_bits) - 1;
				818	return this_cfloat::from_bits(false, exp_bits, sig_bits);
				819	}
				820
				821	static constexpr auto denorm_min() noexcept
				822	{
				823	return this_cfloat::from_bits(false, 0, 1);
				824	}
				825
				826	static constexpr bool is_iec559 = false;
				827	static constexpr bool is_bounded = false;
				828	static constexpr bool is_modulo = false;
				829
				830	static constexpr bool traps = false;
				831	static constexpr bool tinyness_before = false;
				832	static constexpr float_round_style round_style = round_to_nearest;
				833	};
				834
				835	} // namespace std
				836
				837	#endif // CT_CFLOAT_H