diff options
| -rw-r--r-- | library/core/src/num/dec2flt/float.rs | 272 | ||||
| -rw-r--r-- | library/core/src/num/dec2flt/lemire.rs | 18 | ||||
| -rw-r--r-- | library/core/src/num/dec2flt/mod.rs | 6 | ||||
| -rw-r--r-- | library/core/src/num/dec2flt/slow.rs | 18 | ||||
| -rw-r--r-- | library/coretests/tests/num/dec2flt/float.rs | 40 | ||||
| -rw-r--r-- | library/coretests/tests/num/dec2flt/lemire.rs | 6 | ||||
| -rw-r--r-- | library/coretests/tests/num/dec2flt/parse.rs | 15 | ||||
| -rw-r--r-- | src/etc/test-float-parse/src/traits.rs | 6 |
8 files changed, 267 insertions, 114 deletions
diff --git a/library/core/src/num/dec2flt/float.rs b/library/core/src/num/dec2flt/float.rs index da57aa9a546..b8a28a67569 100644 --- a/library/core/src/num/dec2flt/float.rs +++ b/library/core/src/num/dec2flt/float.rs @@ -1,14 +1,57 @@ //! Helper trait for generic float types. +use core::f64; + use crate::fmt::{Debug, LowerExp}; use crate::num::FpCategory; -use crate::ops::{Add, Div, Mul, Neg}; +use crate::ops::{self, Add, Div, Mul, Neg}; + +/// Lossy `as` casting between two types. +pub trait CastInto<T: Copy>: Copy { + fn cast(self) -> T; +} + +/// Collection of traits that allow us to be generic over integer size. +pub trait Integer: + Sized + + Clone + + Copy + + Debug + + ops::Shr<u32, Output = Self> + + ops::Shl<u32, Output = Self> + + ops::BitAnd<Output = Self> + + ops::BitOr<Output = Self> + + PartialEq + + CastInto<i16> +{ + const ZERO: Self; + const ONE: Self; +} + +macro_rules! int { + ($($ty:ty),+) => { + $( + impl CastInto<i16> for $ty { + fn cast(self) -> i16 { + self as i16 + } + } + + impl Integer for $ty { + const ZERO: Self = 0; + const ONE: Self = 1; + } + )+ + } +} + +int!(u32, u64); -/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`. +/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats. /// /// See the parent module's doc comment for why this is necessary. /// -/// Should **never ever** be implemented for other types or be used outside the dec2flt module. +/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module. #[doc(hidden)] pub trait RawFloat: Sized @@ -24,62 +67,107 @@ pub trait RawFloat: + Copy + Debug { + /// The unsigned integer with the same size as the float + type Int: Integer + Into<u64>; + + /* general constants */ + const INFINITY: Self; const NEG_INFINITY: Self; const NAN: Self; const NEG_NAN: Self; - /// The number of bits in the significand, *excluding* the hidden bit. - const MANTISSA_EXPLICIT_BITS: usize; - - // Round-to-even only happens for negative values of q - // when q ≥ −4 in the 64-bit case and when q ≥ −17 in - // the 32-bitcase. - // - // When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we - // have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have - // 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10. - // - // When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64 - // so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case) - // or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64 - // (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11 - // or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase). - // - // Thus we have that we only need to round ties to even when - // we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10] - // (in the 32-bit case). In both cases,the power of five(5^|q|) - // fits in a 64-bit word. - const MIN_EXPONENT_ROUND_TO_EVEN: i32; - const MAX_EXPONENT_ROUND_TO_EVEN: i32; + /// Bit width of the float + const BITS: u32; - // Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋` - const MIN_EXPONENT_FAST_PATH: i64; + /// The number of bits in the significand, *including* the hidden bit. + const SIG_TOTAL_BITS: u32; - // Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋` - const MAX_EXPONENT_FAST_PATH: i64; + const EXP_MASK: Self::Int; + const SIG_MASK: Self::Int; - // Maximum exponent that can be represented for a disguised-fast path case. - // This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋` - const MAX_EXPONENT_DISGUISED_FAST_PATH: i64; - - // Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`. - const MINIMUM_EXPONENT: i32; + /// The number of bits in the significand, *excluding* the hidden bit. + const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - 1; + + /// Number of bits in the exponent. + const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; + + /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite + /// representation. + /// + /// This shifted fully right, use `EXP_MASK` for the shifted value. + const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; + + /// Signed version of `EXP_SAT` since we convert a lot. + const INFINITE_POWER: i32 = Self::EXP_SAT as i32; + + /// The exponent bias value. This is also the maximum value of the exponent. + const EXP_BIAS: u32 = Self::EXP_SAT >> 1; + + /// Minimum exponent value of normal values. + const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - 1); + + /// Round-to-even only happens for negative values of q + /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in + /// the 32-bitcase. + /// + /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we + /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have + /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10. + /// + /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64 + /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case) + /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64 + /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11 + /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase). + /// + /// Thus we have that we only need to round ties to even when + /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10] + /// (in the 32-bit case). In both cases,the power of five(5^|q|) + /// fits in a 64-bit word. + const MIN_EXPONENT_ROUND_TO_EVEN: i32; + const MAX_EXPONENT_ROUND_TO_EVEN: i32; - // Largest exponent value `(1 << EXP_BITS) - 1`. - const INFINITE_POWER: i32; + /* limits related to Fast pathing */ + + /// Largest decimal exponent for a non-infinite value. + /// + /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast + /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity. + const LARGEST_POWER_OF_TEN: i32 = { + let largest_pow2 = Self::EXP_BIAS + 1; + pow2_to_pow10(largest_pow2 as i64) as i32 + }; + + /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything + /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero. + /// + /// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is + /// the smallest power of two. The `2^64 - 1)` denomenator comes from the number of values + /// that are representable by the intermediate storage format. I don't actually know _why_ + /// the storage format is relevant here. + /// + /// The values may be calculated using the formula. Unfortunately we cannot calculate them at + /// compile time since intermediates exceed the range of an `f64`. + const SMALLEST_POWER_OF_TEN: i32; - // Index (in bits) of the sign. - const SIGN_INDEX: usize; + /// Maximum exponent for a fast path case, or `⌊(SIG_BITS+1)/log2(5)⌋` + // assuming FLT_EVAL_METHOD = 0 + const MAX_EXPONENT_FAST_PATH: i64 = { + let log2_5 = f64::consts::LOG2_10 - 1.0; + (Self::SIG_TOTAL_BITS as f64 / log2_5) as i64 + }; - // Smallest decimal exponent for a non-zero value. - const SMALLEST_POWER_OF_TEN: i32; + /// Minimum exponent for a fast path case, or `-⌊(SIG_BITS+1)/log2(5)⌋` + const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH; - // Largest decimal exponent for a non-infinite value. - const LARGEST_POWER_OF_TEN: i32; + /// Maximum exponent that can be represented for a disguised-fast path case. + /// This is `MAX_EXPONENT_FAST_PATH + ⌊(SIG_BITS+1)/log2(10)⌋` + const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = + Self::MAX_EXPONENT_FAST_PATH + (Self::SIG_TOTAL_BITS as f64 / f64::consts::LOG2_10) as i64; - // Maximum mantissa for the fast-path (`1 << 53` for f64). - const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_EXPLICIT_BITS; + /// Maximum mantissa for the fast-path (`1 << 53` for f64). + const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::SIG_TOTAL_BITS; /// Converts integer into float through an as cast. /// This is only called in the fast-path algorithm, and therefore @@ -96,27 +184,51 @@ pub trait RawFloat: /// Returns the category that this number falls into. fn classify(self) -> FpCategory; + /// Transmute to the integer representation + fn to_bits(self) -> Self::Int; + /// Returns the mantissa, exponent and sign as integers. - fn integer_decode(self) -> (u64, i16, i8); + /// + /// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float. + /// For 0, the exponent will be `-(EXP_BIAS + SIG_BITS`, which is the + /// minimum subnormal power. + fn integer_decode(self) -> (u64, i16, i8) { + let bits = self.to_bits(); + let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 }; + let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast(); + let mantissa = if exponent == 0 { + (bits & Self::SIG_MASK) << 1 + } else { + (bits & Self::SIG_MASK) | (Self::Int::ONE << Self::SIG_BITS) + }; + // Exponent bias + mantissa shift + exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16; + (mantissa.into(), exponent, sign) + } +} + +/// Solve for `b` in `10^b = 2^a` +const fn pow2_to_pow10(a: i64) -> i64 { + let res = (a as f64) / f64::consts::LOG2_10; + res as i64 } impl RawFloat for f32 { + type Int = u32; + const INFINITY: Self = f32::INFINITY; const NEG_INFINITY: Self = f32::NEG_INFINITY; const NAN: Self = f32::NAN; const NEG_NAN: Self = -f32::NAN; - const MANTISSA_EXPLICIT_BITS: usize = 23; + const BITS: u32 = 32; + const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS; + const EXP_MASK: Self::Int = Self::EXP_MASK; + const SIG_MASK: Self::Int = Self::MAN_MASK; + const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17; const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10; - const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0 - const MAX_EXPONENT_FAST_PATH: i64 = 10; - const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17; - const MINIMUM_EXPONENT: i32 = -127; - const INFINITE_POWER: i32 = 0xFF; - const SIGN_INDEX: usize = 31; const SMALLEST_POWER_OF_TEN: i32 = -65; - const LARGEST_POWER_OF_TEN: i32 = 38; #[inline] fn from_u64(v: u64) -> Self { @@ -136,16 +248,8 @@ impl RawFloat for f32 { TABLE[exponent & 15] } - /// Returns the mantissa, exponent and sign as integers. - fn integer_decode(self) -> (u64, i16, i8) { - let bits = self.to_bits(); - let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 }; - let mut exponent: i16 = ((bits >> 23) & 0xff) as i16; - let mantissa = - if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 }; - // Exponent bias + mantissa shift - exponent -= 127 + 23; - (mantissa as u64, exponent, sign) + fn to_bits(self) -> Self::Int { + self.to_bits() } fn classify(self) -> FpCategory { @@ -154,22 +258,21 @@ impl RawFloat for f32 { } impl RawFloat for f64 { - const INFINITY: Self = f64::INFINITY; - const NEG_INFINITY: Self = f64::NEG_INFINITY; - const NAN: Self = f64::NAN; - const NEG_NAN: Self = -f64::NAN; + type Int = u64; + + const INFINITY: Self = Self::INFINITY; + const NEG_INFINITY: Self = Self::NEG_INFINITY; + const NAN: Self = Self::NAN; + const NEG_NAN: Self = -Self::NAN; + + const BITS: u32 = 64; + const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS; + const EXP_MASK: Self::Int = Self::EXP_MASK; + const SIG_MASK: Self::Int = Self::MAN_MASK; - const MANTISSA_EXPLICIT_BITS: usize = 52; const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4; const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23; - const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0 - const MAX_EXPONENT_FAST_PATH: i64 = 22; - const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37; - const MINIMUM_EXPONENT: i32 = -1023; - const INFINITE_POWER: i32 = 0x7FF; - const SIGN_INDEX: usize = 63; const SMALLEST_POWER_OF_TEN: i32 = -342; - const LARGEST_POWER_OF_TEN: i32 = 308; #[inline] fn from_u64(v: u64) -> Self { @@ -190,19 +293,8 @@ impl RawFloat for f64 { TABLE[exponent & 31] } - /// Returns the mantissa, exponent and sign as integers. - fn integer_decode(self) -> (u64, i16, i8) { - let bits = self.to_bits(); - let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 }; - let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16; - let mantissa = if exponent == 0 { - (bits & 0xfffffffffffff) << 1 - } else { - (bits & 0xfffffffffffff) | 0x10000000000000 - }; - // Exponent bias + mantissa shift - exponent -= 1023 + 52; - (mantissa, exponent, sign) + fn to_bits(self) -> Self::Int { + self.to_bits() } fn classify(self) -> FpCategory { diff --git a/library/core/src/num/dec2flt/lemire.rs b/library/core/src/num/dec2flt/lemire.rs index 0cc39cb9b62..f84929a03c1 100644 --- a/library/core/src/num/dec2flt/lemire.rs +++ b/library/core/src/num/dec2flt/lemire.rs @@ -38,7 +38,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp { // Normalize our significant digits, so the most-significant bit is set. let lz = w.leading_zeros(); w <<= lz; - let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3); + let (lo, hi) = compute_product_approx(q, w, F::SIG_BITS as usize + 3); if lo == 0xFFFF_FFFF_FFFF_FFFF { // If we have failed to approximate w x 5^-q with our 128-bit value. // Since the addition of 1 could lead to an overflow which could then @@ -61,8 +61,8 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp { } } let upperbit = (hi >> 63) as i32; - let mut mantissa = hi >> (upperbit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3); - let mut power2 = power(q as i32) + upperbit - lz as i32 - F::MINIMUM_EXPONENT; + let mut mantissa = hi >> (upperbit + 64 - F::SIG_BITS as i32 - 3); + let mut power2 = power(q as i32) + upperbit - lz as i32 - F::EXP_MIN + 1; if power2 <= 0 { if -power2 + 1 >= 64 { // Have more than 64 bits below the minimum exponent, must be 0. @@ -72,7 +72,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp { mantissa >>= -power2 + 1; mantissa += mantissa & 1; mantissa >>= 1; - power2 = (mantissa >= (1_u64 << F::MANTISSA_EXPLICIT_BITS)) as i32; + power2 = (mantissa >= (1_u64 << F::SIG_BITS)) as i32; return BiasedFp { m: mantissa, p_biased: power2 }; } // Need to handle rounding ties. Normally, we need to round up, @@ -89,8 +89,8 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp { if lo <= 1 && q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64 && q <= F::MAX_EXPONENT_ROUND_TO_EVEN as i64 - && mantissa & 3 == 1 - && (mantissa << (upperbit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3)) == hi + && mantissa & 0b11 == 0b01 + && (mantissa << (upperbit + 64 - F::SIG_BITS as i32 - 3)) == hi { // Zero the lowest bit, so we don't round up. mantissa &= !1_u64; @@ -98,15 +98,15 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp { // Round-to-even, then shift the significant digits into place. mantissa += mantissa & 1; mantissa >>= 1; - if mantissa >= (2_u64 << F::MANTISSA_EXPLICIT_BITS) { + if mantissa >= (2_u64 << F::SIG_BITS) { // Rounding up overflowed, so the carry bit is set. Set the // mantissa to 1 (only the implicit, hidden bit is set) and // increase the exponent. - mantissa = 1_u64 << F::MANTISSA_EXPLICIT_BITS; + mantissa = 1_u64 << F::SIG_BITS; power2 += 1; } // Zero out the hidden bit. - mantissa &= !(1_u64 << F::MANTISSA_EXPLICIT_BITS); + mantissa &= !(1_u64 << F::SIG_BITS); if power2 >= F::INFINITE_POWER { // Exponent is above largest normal value, must be infinite. return fp_inf; diff --git a/library/core/src/num/dec2flt/mod.rs b/library/core/src/num/dec2flt/mod.rs index 9d1989c4b81..d1a0e1db313 100644 --- a/library/core/src/num/dec2flt/mod.rs +++ b/library/core/src/num/dec2flt/mod.rs @@ -232,10 +232,10 @@ pub fn pfe_invalid() -> ParseFloatError { } /// Converts a `BiasedFp` to the closest machine float type. -fn biased_fp_to_float<T: RawFloat>(x: BiasedFp) -> T { +fn biased_fp_to_float<F: RawFloat>(x: BiasedFp) -> F { let mut word = x.m; - word |= (x.p_biased as u64) << T::MANTISSA_EXPLICIT_BITS; - T::from_u64_bits(word) + word |= (x.p_biased as u64) << F::SIG_BITS; + F::from_u64_bits(word) } /// Converts a decimal string into a floating point number. diff --git a/library/core/src/num/dec2flt/slow.rs b/library/core/src/num/dec2flt/slow.rs index e0c4ae117da..3baed426523 100644 --- a/library/core/src/num/dec2flt/slow.rs +++ b/library/core/src/num/dec2flt/slow.rs @@ -74,36 +74,36 @@ pub(crate) fn parse_long_mantissa<F: RawFloat>(s: &[u8]) -> BiasedFp { } // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. exp2 -= 1; - while (F::MINIMUM_EXPONENT + 1) > exp2 { - let mut n = ((F::MINIMUM_EXPONENT + 1) - exp2) as usize; + while F::EXP_MIN > exp2 { + let mut n = (F::EXP_MIN - exp2) as usize; if n > MAX_SHIFT { n = MAX_SHIFT; } d.right_shift(n); exp2 += n as i32; } - if (exp2 - F::MINIMUM_EXPONENT) >= F::INFINITE_POWER { + if (exp2 - F::EXP_MIN + 1) >= F::INFINITE_POWER { return fp_inf; } // Shift the decimal to the hidden bit, and then round the value // to get the high mantissa+1 bits. - d.left_shift(F::MANTISSA_EXPLICIT_BITS + 1); + d.left_shift(F::SIG_BITS as usize + 1); let mut mantissa = d.round(); - if mantissa >= (1_u64 << (F::MANTISSA_EXPLICIT_BITS + 1)) { + if mantissa >= (1_u64 << (F::SIG_BITS + 1)) { // Rounding up overflowed to the carry bit, need to // shift back to the hidden bit. d.right_shift(1); exp2 += 1; mantissa = d.round(); - if (exp2 - F::MINIMUM_EXPONENT) >= F::INFINITE_POWER { + if (exp2 - F::EXP_MIN + 1) >= F::INFINITE_POWER { return fp_inf; } } - let mut power2 = exp2 - F::MINIMUM_EXPONENT; - if mantissa < (1_u64 << F::MANTISSA_EXPLICIT_BITS) { + let mut power2 = exp2 - F::EXP_MIN + 1; + if mantissa < (1_u64 << F::SIG_BITS) { power2 -= 1; } // Zero out all the bits above the explicit mantissa bits. - mantissa &= (1_u64 << F::MANTISSA_EXPLICIT_BITS) - 1; + mantissa &= (1_u64 << F::SIG_BITS) - 1; BiasedFp { m: mantissa, p_biased: power2 } } diff --git a/library/coretests/tests/num/dec2flt/float.rs b/library/coretests/tests/num/dec2flt/float.rs index 40f0776e021..b5afd3e3b24 100644 --- a/library/coretests/tests/num/dec2flt/float.rs +++ b/library/coretests/tests/num/dec2flt/float.rs @@ -31,3 +31,43 @@ fn test_f64_integer_decode() { let (nan_m, nan_p, _nan_s) = f64::NAN.integer_decode(); assert_eq!((nan_m, nan_p), (6755399441055744, 972)); } + +/* Sanity checks of computed magic numbers */ + +#[test] +fn test_f32_consts() { + assert_eq!(<f32 as RawFloat>::INFINITY, f32::INFINITY); + assert_eq!(<f32 as RawFloat>::NEG_INFINITY, -f32::INFINITY); + assert_eq!(<f32 as RawFloat>::NAN.to_bits(), f32::NAN.to_bits()); + assert_eq!(<f32 as RawFloat>::NEG_NAN.to_bits(), (-f32::NAN).to_bits()); + assert_eq!(<f32 as RawFloat>::SIG_BITS, 23); + assert_eq!(<f32 as RawFloat>::MIN_EXPONENT_ROUND_TO_EVEN, -17); + assert_eq!(<f32 as RawFloat>::MAX_EXPONENT_ROUND_TO_EVEN, 10); + assert_eq!(<f32 as RawFloat>::MIN_EXPONENT_FAST_PATH, -10); + assert_eq!(<f32 as RawFloat>::MAX_EXPONENT_FAST_PATH, 10); + assert_eq!(<f32 as RawFloat>::MAX_EXPONENT_DISGUISED_FAST_PATH, 17); + assert_eq!(<f32 as RawFloat>::EXP_MIN, -126); + assert_eq!(<f32 as RawFloat>::EXP_SAT, 0xff); + assert_eq!(<f32 as RawFloat>::SMALLEST_POWER_OF_TEN, -65); + assert_eq!(<f32 as RawFloat>::LARGEST_POWER_OF_TEN, 38); + assert_eq!(<f32 as RawFloat>::MAX_MANTISSA_FAST_PATH, 16777216); +} + +#[test] +fn test_f64_consts() { + assert_eq!(<f64 as RawFloat>::INFINITY, f64::INFINITY); + assert_eq!(<f64 as RawFloat>::NEG_INFINITY, -f64::INFINITY); + assert_eq!(<f64 as RawFloat>::NAN.to_bits(), f64::NAN.to_bits()); + assert_eq!(<f64 as RawFloat>::NEG_NAN.to_bits(), (-f64::NAN).to_bits()); + assert_eq!(<f64 as RawFloat>::SIG_BITS, 52); + assert_eq!(<f64 as RawFloat>::MIN_EXPONENT_ROUND_TO_EVEN, -4); + assert_eq!(<f64 as RawFloat>::MAX_EXPONENT_ROUND_TO_EVEN, 23); + assert_eq!(<f64 as RawFloat>::MIN_EXPONENT_FAST_PATH, -22); + assert_eq!(<f64 as RawFloat>::MAX_EXPONENT_FAST_PATH, 22); + assert_eq!(<f64 as RawFloat>::MAX_EXPONENT_DISGUISED_FAST_PATH, 37); + assert_eq!(<f64 as RawFloat>::EXP_MIN, -1022); + assert_eq!(<f64 as RawFloat>::EXP_SAT, 0x7ff); + assert_eq!(<f64 as RawFloat>::SMALLEST_POWER_OF_TEN, -342); + assert_eq!(<f64 as RawFloat>::LARGEST_POWER_OF_TEN, 308); + assert_eq!(<f64 as RawFloat>::MAX_MANTISSA_FAST_PATH, 9007199254740992); +} diff --git a/library/coretests/tests/num/dec2flt/lemire.rs b/library/coretests/tests/num/dec2flt/lemire.rs index 9f228a25e46..0db80fbd525 100644 --- a/library/coretests/tests/num/dec2flt/lemire.rs +++ b/library/coretests/tests/num/dec2flt/lemire.rs @@ -1,3 +1,4 @@ +use core::num::dec2flt::float::RawFloat; use core::num::dec2flt::lemire::compute_float; fn compute_float32(q: i64, w: u64) -> (i32, u64) { @@ -27,6 +28,11 @@ fn compute_float_f32_rounding() { // Let's check the lines to see if anything is different in table... assert_eq!(compute_float32(-10, 167772190000000000), (151, 2)); assert_eq!(compute_float32(-10, 167772200000000000), (151, 2)); + + // Check the rounding point between infinity and the next representable number down + assert_eq!(compute_float32(38, 3), (f32::INFINITE_POWER - 1, 6402534)); + assert_eq!(compute_float32(38, 4), (f32::INFINITE_POWER, 0)); // infinity + assert_eq!(compute_float32(20, 3402823470000000000), (f32::INFINITE_POWER - 1, 8388607)); } #[test] diff --git a/library/coretests/tests/num/dec2flt/parse.rs b/library/coretests/tests/num/dec2flt/parse.rs index ec705a61e13..59be3915052 100644 --- a/library/coretests/tests/num/dec2flt/parse.rs +++ b/library/coretests/tests/num/dec2flt/parse.rs @@ -58,6 +58,21 @@ macro_rules! assert_float_result_bits_eq { } #[test] +fn regression() { + // These showed up in fuzz tests when the minimum exponent was incorrect. + assert_float_result_bits_eq!( + 0x0, + f64, + "3313756768023998018398807867233977556112078681253148176737587500333136120852692315608454494981109839693784033457129423181787087843504060087613228932431e-475" + ); + assert_float_result_bits_eq!( + 0x0, + f64, + "5298127456259331337220.92759278003098321644501973966679724599271041396379712108366679824365568578569680024083293475291869842408884554511641179110778276695274832779269225510492006696321279587846006535230380114430977056662212751544508159333199129106162019382177820713609e-346" + ); +} + +#[test] fn issue31109() { // Regression test for #31109. // Ensure the test produces a valid float with the expected bit pattern. diff --git a/src/etc/test-float-parse/src/traits.rs b/src/etc/test-float-parse/src/traits.rs index f5333d63b36..f7f5f5dd35b 100644 --- a/src/etc/test-float-parse/src/traits.rs +++ b/src/etc/test-float-parse/src/traits.rs @@ -147,12 +147,12 @@ pub trait Float: } macro_rules! impl_float { - ($($fty:ty, $ity:ty, $bits:literal);+) => { + ($($fty:ty, $ity:ty);+) => { $( impl Float for $fty { type Int = $ity; type SInt = <Self::Int as Int>::Signed; - const BITS: u32 = $bits; + const BITS: u32 = <$ity>::BITS; const MAN_BITS: u32 = Self::MANTISSA_DIGITS - 1; const MAN_MASK: Self::Int = (Self::Int::ONE << Self::MAN_BITS) - Self::Int::ONE; const SIGN_MASK: Self::Int = Self::Int::ONE << (Self::BITS-1); @@ -168,7 +168,7 @@ macro_rules! impl_float { } } -impl_float!(f32, u32, 32; f64, u64, 64); +impl_float!(f32, u32; f64, u64); /// A test generator. Should provide an iterator that produces unique patterns to parse. /// |
