From c82da7a54b9efb1a0ccbe11de66c71f547bf7db9 Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Sun, 19 Apr 2015 14:19:54 +0900 Subject: core: added core::num::flt2dec for floating-point formatting. This is a fork of the flt2dec portion of rust-strconv [1] with a necessary relicensing (the original code was licensed CC0-1.0). Each module is accompanied with large unit tests, integrated in this commit as coretest::num::flt2dec. This module is added in order to replace the existing core::fmt::float method. The forked revision of rust-strconv is from 2015-04-20, with a commit ID 9adf6d3571c6764a6f240a740c823024f70dc1c7. [1] https://github.com/lifthrasiir/rust-strconv/ --- src/libcore/num/flt2dec/bignum.rs | 355 ++++++++++++++ src/libcore/num/flt2dec/decoder.rs | 100 ++++ src/libcore/num/flt2dec/estimator.rs | 23 + src/libcore/num/flt2dec/mod.rs | 662 +++++++++++++++++++++++++ src/libcore/num/flt2dec/strategy/dragon.rs | 327 +++++++++++++ src/libcore/num/flt2dec/strategy/grisu.rs | 749 +++++++++++++++++++++++++++++ src/libcore/num/mod.rs | 3 + 7 files changed, 2219 insertions(+) create mode 100644 src/libcore/num/flt2dec/bignum.rs create mode 100644 src/libcore/num/flt2dec/decoder.rs create mode 100644 src/libcore/num/flt2dec/estimator.rs create mode 100644 src/libcore/num/flt2dec/mod.rs create mode 100644 src/libcore/num/flt2dec/strategy/dragon.rs create mode 100644 src/libcore/num/flt2dec/strategy/grisu.rs (limited to 'src/libcore/num') diff --git a/src/libcore/num/flt2dec/bignum.rs b/src/libcore/num/flt2dec/bignum.rs new file mode 100644 index 00000000000..449d080dc5c --- /dev/null +++ b/src/libcore/num/flt2dec/bignum.rs @@ -0,0 +1,355 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Custom arbitrary-precision number (bignum) implementation. +//! +//! This is designed to avoid the heap allocation at expense of stack memory. +//! The most used bignum type, `Big32x36`, is limited by 32 × 36 = 1,152 bits +//! and will take at most 152 bytes of stack memory. This is (barely) enough +//! for handling all possible finite `f64` values. +//! +//! In principle it is possible to have multiple bignum types for different +//! inputs, but we don't do so to avoid the code bloat. Each bignum is still +//! tracked for the actual usages, so it normally doesn't matter. + +#![macro_use] + +use prelude::*; +use mem; +use intrinsics; + +/// Arithmetic operations required by bignums. +pub trait FullOps { + /// Returns `(carry', v')` such that `carry' * 2^W + v' = self + other + carry`, + /// where `W` is the number of bits in `Self`. + fn full_add(self, other: Self, carry: bool) -> (bool /*carry*/, Self); + + /// Returns `(carry', v')` such that `carry' * 2^W + v' = self * other + carry`, + /// where `W` is the number of bits in `Self`. + fn full_mul(self, other: Self, carry: Self) -> (Self /*carry*/, Self); + + /// Returns `(carry', v')` such that `carry' * 2^W + v' = self * other + other2 + carry`, + /// where `W` is the number of bits in `Self`. + fn full_mul_add(self, other: Self, other2: Self, carry: Self) -> (Self /*carry*/, Self); + + /// Returns `(quo, rem)` such that `borrow * 2^W + self = quo * other + rem` + /// and `0 <= rem < other`, where `W` is the number of bits in `Self`. + fn full_div_rem(self, other: Self, borrow: Self) -> (Self /*quotient*/, Self /*remainder*/); +} + +macro_rules! impl_full_ops { + ($($ty:ty: add($addfn:path), mul/div($bigty:ident);)*) => ( + $( + impl FullOps for $ty { + fn full_add(self, other: $ty, carry: bool) -> (bool, $ty) { + // this cannot overflow, the output is between 0 and 2*2^nbits - 1 + // FIXME will LLVM optimize this into ADC or similar??? + let (v, carry1) = unsafe { $addfn(self, other) }; + let (v, carry2) = unsafe { $addfn(v, if carry {1} else {0}) }; + (carry1 || carry2, v) + } + + fn full_mul(self, other: $ty, carry: $ty) -> ($ty, $ty) { + // this cannot overflow, the output is between 0 and 2^nbits * (2^nbits - 1) + let nbits = mem::size_of::<$ty>() * 8; + let v = (self as $bigty) * (other as $bigty) + (carry as $bigty); + ((v >> nbits) as $ty, v as $ty) + } + + fn full_mul_add(self, other: $ty, other2: $ty, carry: $ty) -> ($ty, $ty) { + // this cannot overflow, the output is between 0 and 2^(2*nbits) - 1 + let nbits = mem::size_of::<$ty>() * 8; + let v = (self as $bigty) * (other as $bigty) + (other2 as $bigty) + + (carry as $bigty); + ((v >> nbits) as $ty, v as $ty) + } + + fn full_div_rem(self, other: $ty, borrow: $ty) -> ($ty, $ty) { + debug_assert!(borrow < other); + // this cannot overflow, the dividend is between 0 and other * 2^nbits - 1 + let nbits = mem::size_of::<$ty>() * 8; + let lhs = ((borrow as $bigty) << nbits) | (self as $bigty); + let rhs = other as $bigty; + ((lhs / rhs) as $ty, (lhs % rhs) as $ty) + } + } + )* + ) +} + +impl_full_ops! { + u8: add(intrinsics::u8_add_with_overflow), mul/div(u16); + u16: add(intrinsics::u16_add_with_overflow), mul/div(u32); + u32: add(intrinsics::u32_add_with_overflow), mul/div(u64); +// u64: add(intrinsics::u64_add_with_overflow), mul/div(u128); // damn! +} + +macro_rules! define_bignum { + ($name:ident: type=$ty:ty, n=$n:expr) => ( + /// Stack-allocated arbitrary-precision (up to certain limit) integer. + /// + /// This is backed by an fixed-size array of given type ("digit"). + /// While the array is not very large (normally some hundred bytes), + /// copying it recklessly may result in the performance hit. + /// Thus this is intentionally not `Copy`. + /// + /// All operations available to bignums panic in the case of over/underflows. + /// The caller is responsible to use large enough bignum types. + pub struct $name { + /// One plus the offset to the maximum "digit" in the use. + /// This does not decrease, so be aware of the computation order. + /// `base[size..]` should be zero. + size: usize, + /// Digits. `[a, b, c, ...]` represents `a + b*n + c*n^2 + ...`. + base: [$ty; $n] + } + + impl $name { + /// Makes a bignum from one digit. + pub fn from_small(v: $ty) -> $name { + let mut base = [0; $n]; + base[0] = v; + $name { size: 1, base: base } + } + + /// Makes a bignum from `u64` value. + pub fn from_u64(mut v: u64) -> $name { + use mem; + + let mut base = [0; $n]; + let mut sz = 0; + while v > 0 { + base[sz] = v as $ty; + v >>= mem::size_of::<$ty>() * 8; + sz += 1; + } + $name { size: sz, base: base } + } + + /// Returns true if the bignum is zero. + pub fn is_zero(&self) -> bool { + self.base[..self.size].iter().all(|&v| v == 0) + } + + /// Adds `other` to itself and returns its own mutable reference. + pub fn add<'a>(&'a mut self, other: &$name) -> &'a mut $name { + use cmp; + use num::flt2dec::bignum::FullOps; + + let mut sz = cmp::max(self.size, other.size); + let mut carry = false; + for (a, b) in self.base[..sz].iter_mut().zip(other.base[..sz].iter()) { + let (c, v) = (*a).full_add(*b, carry); + *a = v; + carry = c; + } + if carry { + self.base[sz] = 1; + sz += 1; + } + self.size = sz; + self + } + + /// Subtracts `other` from itself and returns its own mutable reference. + pub fn sub<'a>(&'a mut self, other: &$name) -> &'a mut $name { + use cmp; + use num::flt2dec::bignum::FullOps; + + let sz = cmp::max(self.size, other.size); + let mut noborrow = true; + for (a, b) in self.base[..sz].iter_mut().zip(other.base[..sz].iter()) { + let (c, v) = (*a).full_add(!*b, noborrow); + *a = v; + noborrow = c; + } + assert!(noborrow); + self.size = sz; + self + } + + /// Multiplies itself by a digit-sized `other` and returns its own + /// mutable reference. + pub fn mul_small<'a>(&'a mut self, other: $ty) -> &'a mut $name { + use num::flt2dec::bignum::FullOps; + + let mut sz = self.size; + let mut carry = 0; + for a in self.base[..sz].iter_mut() { + let (c, v) = (*a).full_mul(other, carry); + *a = v; + carry = c; + } + if carry > 0 { + self.base[sz] = carry; + sz += 1; + } + self.size = sz; + self + } + + /// Multiplies itself by `2^bits` and returns its own mutable reference. + pub fn mul_pow2<'a>(&'a mut self, bits: usize) -> &'a mut $name { + use mem; + + let digitbits = mem::size_of::<$ty>() * 8; + let digits = bits / digitbits; + let bits = bits % digitbits; + + assert!(digits < $n); + debug_assert!(self.base[$n-digits..].iter().all(|&v| v == 0)); + debug_assert!(bits == 0 || (self.base[$n-digits-1] >> (digitbits - bits)) == 0); + + // shift by `digits * digitbits` bits + for i in (0..self.size).rev() { + self.base[i+digits] = self.base[i]; + } + for i in 0..digits { + self.base[i] = 0; + } + + // shift by `nbits` bits + let mut sz = self.size + digits; + if bits > 0 { + let last = sz; + let overflow = self.base[last-1] >> (digitbits - bits); + if overflow > 0 { + self.base[last] = overflow; + sz += 1; + } + for i in (digits+1..last).rev() { + self.base[i] = (self.base[i] << bits) | + (self.base[i-1] >> (digitbits - bits)); + } + self.base[digits] <<= bits; + // self.base[..digits] is zero, no need to shift + } + + self.size = sz; + self + } + + /// Multiplies itself by a number described by `other[0] + other[1] * n + + /// other[2] * n^2 + ...` and returns its own mutable reference. + pub fn mul_digits<'a>(&'a mut self, other: &[$ty]) -> &'a mut $name { + // the internal routine. works best when aa.len() <= bb.len(). + fn mul_inner(ret: &mut [$ty; $n], aa: &[$ty], bb: &[$ty]) -> usize { + use num::flt2dec::bignum::FullOps; + + let mut retsz = 0; + for (i, &a) in aa.iter().enumerate() { + if a == 0 { continue; } + let mut sz = bb.len(); + let mut carry = 0; + for (j, &b) in bb.iter().enumerate() { + let (c, v) = a.full_mul_add(b, ret[i + j], carry); + ret[i + j] = v; + carry = c; + } + if carry > 0 { + ret[i + sz] = carry; + sz += 1; + } + if retsz < i + sz { + retsz = i + sz; + } + } + retsz + } + + let mut ret = [0; $n]; + let retsz = if self.size < other.len() { + mul_inner(&mut ret, &self.base[..self.size], other) + } else { + mul_inner(&mut ret, other, &self.base[..self.size]) + }; + self.base = ret; + self.size = retsz; + self + } + + /// Divides itself by a digit-sized `other` and returns its own + /// mutable reference *and* the remainder. + pub fn div_rem_small<'a>(&'a mut self, other: $ty) -> (&'a mut $name, $ty) { + use num::flt2dec::bignum::FullOps; + + assert!(other > 0); + + let sz = self.size; + let mut borrow = 0; + for a in self.base[..sz].iter_mut().rev() { + let (q, r) = (*a).full_div_rem(other, borrow); + *a = q; + borrow = r; + } + (self, borrow) + } + } + + impl ::cmp::PartialEq for $name { + fn eq(&self, other: &$name) -> bool { self.base[..] == other.base[..] } + } + + impl ::cmp::Eq for $name { + } + + impl ::cmp::PartialOrd for $name { + fn partial_cmp(&self, other: &$name) -> ::option::Option<::cmp::Ordering> { + ::option::Option::Some(self.cmp(other)) + } + } + + impl ::cmp::Ord for $name { + fn cmp(&self, other: &$name) -> ::cmp::Ordering { + use cmp::max; + use iter::order; + + let sz = max(self.size, other.size); + let lhs = self.base[..sz].iter().cloned().rev(); + let rhs = other.base[..sz].iter().cloned().rev(); + order::cmp(lhs, rhs) + } + } + + impl ::clone::Clone for $name { + fn clone(&self) -> $name { + $name { size: self.size, base: self.base } + } + } + + impl ::fmt::Debug for $name { + fn fmt(&self, f: &mut ::fmt::Formatter) -> ::fmt::Result { + use mem; + + let sz = if self.size < 1 {1} else {self.size}; + let digitlen = mem::size_of::<$ty>() * 2; + + try!(write!(f, "{:#x}", self.base[sz-1])); + for &v in self.base[..sz-1].iter().rev() { + try!(write!(f, "_{:01$x}", v, digitlen)); + } + ::result::Result::Ok(()) + } + } + ) +} + +/// The digit type for `Big32x36`. +pub type Digit32 = u32; + +define_bignum!(Big32x36: type=Digit32, n=36); + +// this one is used for testing only. +#[doc(hidden)] +pub mod tests { + use prelude::*; + define_bignum!(Big8x3: type=u8, n=3); +} + diff --git a/src/libcore/num/flt2dec/decoder.rs b/src/libcore/num/flt2dec/decoder.rs new file mode 100644 index 00000000000..4b7d00f80e1 --- /dev/null +++ b/src/libcore/num/flt2dec/decoder.rs @@ -0,0 +1,100 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Decodes a floating-point value into individual parts and error ranges. + +use prelude::*; + +use {f32, f64}; +use num::{Float, FpCategory}; + +/// Decoded unsigned finite value, such that: +/// +/// - The original value equals to `mant * 2^exp`. +/// +/// - Any number from `(mant - minus) * 2^exp` to `(mant + plus) * 2^exp` will +/// round to the original value. The range is inclusive only when +/// `inclusive` is true. +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct Decoded { + /// The scaled mantissa. + pub mant: u64, + /// The lower error range. + pub minus: u64, + /// The upper error range. + pub plus: u64, + /// The shared exponent in base 2. + pub exp: i16, + /// True when the error range is inclusive. + /// + /// In IEEE 754, this is true when the original mantissa was even. + pub inclusive: bool, +} + +/// Decoded unsigned value. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum FullDecoded { + /// Not-a-number. + Nan, + /// Infinities, either positive or negative. + Infinite, + /// Zero, either positive or negative. + Zero, + /// Finite numbers with further decoded fields. + Finite(Decoded), +} + +/// A floating point type which can be `decode`d. +pub trait DecodableFloat: Float + Copy { + /// The minimum positive normalized value. + fn min_pos_norm_value() -> Self; +} + +impl DecodableFloat for f32 { + fn min_pos_norm_value() -> Self { f32::MIN_POSITIVE } +} + +impl DecodableFloat for f64 { + fn min_pos_norm_value() -> Self { f64::MIN_POSITIVE } +} + +/// Returns a sign (true when negative) and `FullDecoded` value +/// from given floating point number. +pub fn decode(v: T) -> (/*negative?*/ bool, FullDecoded) { + let (mant, exp, sign) = v.integer_decode(); + let even = (mant & 1) == 0; + let decoded = match v.classify() { + FpCategory::Nan => FullDecoded::Nan, + FpCategory::Infinite => FullDecoded::Infinite, + FpCategory::Zero => FullDecoded::Zero, + FpCategory::Subnormal => { + // (mant - 2, exp) -- (mant, exp) -- (mant + 2, exp) + // Float::integer_decode always preserves the exponent, + // so the mantissa is scaled for subnormals. + FullDecoded::Finite(Decoded { mant: mant, minus: 1, plus: 1, + exp: exp, inclusive: even }) + } + FpCategory::Normal => { + let minnorm = ::min_pos_norm_value().integer_decode(); + if mant == minnorm.0 && exp == minnorm.1 { + // (maxmant, exp - 1) -- (minnormmant, exp) -- (minnormmant + 1, exp) + // where maxmant = minnormmant * 2 - 1 + FullDecoded::Finite(Decoded { mant: mant << 1, minus: 1, plus: 2, + exp: exp - 1, inclusive: even }) + } else { + // (mant - 1, exp) -- (mant, exp) -- (mant + 1, exp) + FullDecoded::Finite(Decoded { mant: mant << 1, minus: 1, plus: 1, + exp: exp - 1, inclusive: even }) + } + } + }; + (sign < 0, decoded) +} + diff --git a/src/libcore/num/flt2dec/estimator.rs b/src/libcore/num/flt2dec/estimator.rs new file mode 100644 index 00000000000..d3e06e406db --- /dev/null +++ b/src/libcore/num/flt2dec/estimator.rs @@ -0,0 +1,23 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! The exponent estimator. + +/// Finds `k_0` such that `10^(k_0-1) < mant * 2^exp <= 10^(k_0+1)`. +/// +/// This is used to approximate `k = ceil(log_10 (mant * 2^exp))`; +/// the true `k` is either `k_0` or `k_0+1`. +#[doc(hidden)] +pub fn estimate_scaling_factor(mant: u64, exp: i16) -> i16 { + // 2^(nbits-1) < mant <= 2^nbits if mant > 0 + let nbits = 64 - (mant - 1).leading_zeros() as i64; + (((nbits + exp as i64) * 1292913986) >> 32) as i16 +} + diff --git a/src/libcore/num/flt2dec/mod.rs b/src/libcore/num/flt2dec/mod.rs new file mode 100644 index 00000000000..8b4b3a45e9b --- /dev/null +++ b/src/libcore/num/flt2dec/mod.rs @@ -0,0 +1,662 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + +Floating-point number to decimal conversion routines. + +# Problem statement + +We are given the floating-point number `v = f * 2^e` with an integer `f`, +and its bounds `minus` and `plus` such that any number between `v - minus` and +`v + plus` will be rounded to `v`. For the simplicity we assume that +this range is exclusive. Then we would like to get the unique decimal +representation `V = 0.d[0..n-1] * 10^k` such that: + +- `d[0]` is non-zero. + +- It's correctly rounded when parsed back: `v - minus < V < v + plus`. + Furthermore it is shortest such one, i.e. there is no representation + with less than `n` digits that is correctly rounded. + +- It's closest to the original value: `abs(V - v) <= 10^(k-n) / 2`. Note that + there might be two representations satisfying this uniqueness requirement, + in which case some tie-breaking mechanism is used. + +We will call this mode of operation as to the *shortest* mode. This mode is used +when there is no additional constraint, and can be thought as a "natural" mode +as it matches the ordinary intuition (it at least prints `0.1f32` as "0.1"). + +We have two more modes of operation closely related to each other. In these modes +we are given either the number of significant digits `n` or the last-digit +limitation `limit` (which determines the actual `n`), and we would like to get +the representation `V = 0.d[0..n-1] * 10^k` such that: + +- `d[0]` is non-zero, unless `n` was zero in which case only `k` is returned. + +- It's closest to the original value: `abs(V - v) <= 10^(k-n) / 2`. Again, + there might be some tie-breaking mechanism. + +When `limit` is given but not `n`, we set `n` such that `k - n = limit` +so that the last digit `d[n-1]` is scaled by `10^(k-n) = 10^limit`. +If such `n` is negative, we clip it to zero so that we will only get `k`. +We are also limited by the supplied buffer. This limitation is used to print +the number up to given number of fractional digits without knowing +the correct `k` beforehand. + +We will call the mode of operation requiring `n` as to the *exact* mode, +and one requiring `limit` as to the *fixed* mode. The exact mode is a subset of +the fixed mode: the sufficiently large last-digit limitation will eventually fill +the supplied buffer and let the algorithm to return. + +# Implementation overview + +It is easy to get the floating point printing correct but slow (Russ Cox has +[demonstrated](http://research.swtch.com/ftoa) how it's easy), or incorrect but +fast (naïve division and modulo). But it is surprisingly hard to print +floating point numbers correctly *and* efficiently. + +There are two classes of algorithms widely known to be correct. + +- The "Dragon" family of algorithm is first described by Guy L. Steele Jr. and + Jon L. White. They rely on the fixed-size big integer for their correctness. + A slight improvement was found later, which is posthumously described by + Robert G. Burger and R. Kent Dybvig. David Gay's `dtoa.c` routine is + a popular implementation of this strategy. + +- The "Grisu" family of algorithm is first described by Florian Loitsch. + They use very cheap integer-only procedure to determine the close-to-correct + representation which is at least guaranteed to be shortest. The variant, + Grisu3, actively detects if the resulting representation is incorrect. + +We implement both algorithms with necessary tweaks to suit our requirements. +In particular, published literatures are short of the actual implementation +difficulties like how to avoid arithmetic overflows. Each implementation, +available in `strategy::dragon` and `strategy::grisu` respectively, +extensively describes all necessary justifications and many proofs for them. +(It is still difficult to follow though. You have been warned.) + +Both implementations expose two public functions: + +- `format_shortest(decoded, buf)`, which always needs at least + `MAX_SIG_DIGITS` digits of buffer. Implements the shortest mode. + +- `format_exact(decoded, buf, limit)`, which accepts as small as + one digit of buffer. Implements exact and fixed modes. + +They try to fill the `u8` buffer with digits and returns the number of digits +written and the exponent `k`. They are total for all finite `f32` and `f64` +inputs (Grisu internally falls back to Dragon if possible). + +The rendered digits are formatted into the actual string form with +four functions: + +- `to_shortest_str` prints the shortest representation, which can be padded by + zeroes to make *at least* given number of fractional digits. + +- `to_shortest_exp_str` prints the shortest representation, which can be + padded by zeroes when its exponent is in the specified ranges, + or can be printed in the exponential form such as `1.23e45`. + +- `to_exact_exp_str` prints the exact representation with given number of + digits in the exponential form. + +- `to_exact_fixed_str` prints the fixed representation with *exactly* + given number of fractional digits. + +They all return a slice of preallocated `Part` array, which corresponds to +the individual part of strings: a fixed string, a part of rendered digits, +a number of zeroes or a small (`u16`) number. The caller is expected to +provide an enough buffer and `Part` array, and to assemble the final +string from parts itself. + +All algorithms and formatting functions are accompanied by extensive tests +in `coretest::num::flt2dec` module. It also shows how to use individual +functions. + +*/ + +// while this is extensively documented, this is in principle private which is +// only made public for testing. do not expose us. +#![doc(hidden)] + +use prelude::*; +use i16; +use num::Float; +use slice::bytes; +pub use self::decoder::{decode, DecodableFloat, FullDecoded, Decoded}; + +pub mod estimator; +pub mod bignum; +pub mod decoder; + +/// Digit-generation algorithms. +pub mod strategy { + pub mod dragon; + pub mod grisu; +} + +/// The minimum size of buffer necessary for the shortest mode. +/// +/// It is a bit non-trivial to derive, but this is one plus the maximal number of +/// significant decimal digits from formatting algorithms with the shortest result. +/// The exact formula is `ceil(# bits in mantissa * log_10 2 + 1)`. +pub const MAX_SIG_DIGITS: usize = 17; + +/// When `d[..n]` contains decimal digits, increase the last digit and propagate carry. +/// Returns a next digit when it causes the length change. +#[doc(hidden)] +pub fn round_up(d: &mut [u8], n: usize) -> Option { + match d[..n].iter().rposition(|&c| c != b'9') { + Some(i) => { // d[i+1..n] is all nines + d[i] += 1; + for j in i+1..n { d[j] = b'0'; } + None + } + None if n > 0 => { // 999..999 rounds to 1000..000 with an increased exponent + d[0] = b'1'; + for j in 1..n { d[j] = b'0'; } + Some(b'0') + } + None => { // an empty buffer rounds up (a bit strange but reasonable) + Some(b'1') + } + } +} + +/// Formatted parts. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Part<'a> { + /// Given number of zero digits. + Zero(usize), + /// A literal number up to 5 digits. + Num(u16), + /// A verbatim copy of given bytes. + Copy(&'a [u8]), +} + +impl<'a> Part<'a> { + /// Returns the exact byte length of given part. + pub fn len(&self) -> usize { + match *self { + Part::Zero(nzeroes) => nzeroes, + Part::Num(v) => if v < 1_000 { if v < 10 { 1 } else if v < 100 { 2 } else { 3 } } + else { if v < 10_000 { 4 } else { 5 } }, + Part::Copy(buf) => buf.len(), + } + } + + /// Writes a part into the supplied buffer. + /// Returns the number of written bytes, or `None` if the buffer is not enough. + /// (It may still leave partially written bytes in the buffer; do not rely on that.) + pub fn write(&self, out: &mut [u8]) -> Option { + let len = self.len(); + if out.len() >= len { + match *self { + Part::Zero(nzeroes) => { + for c in &mut out[..nzeroes] { *c = b'0'; } + } + Part::Num(mut v) => { + for c in out[..len].iter_mut().rev() { + *c = b'0' + (v % 10) as u8; + v /= 10; + } + } + Part::Copy(buf) => { + bytes::copy_memory(buf, out); + } + } + Some(len) + } else { + None + } + } +} + +/// Formatted result containing one or more parts. +/// This can be written to the byte buffer or converted to the allocated string. +#[derive(Clone)] +pub struct Formatted<'a> { + /// A byte slice representing a sign, either `""`, `"-"` or `"+"`. + pub sign: &'static [u8], + /// Formatted parts to be rendered after a sign and optional zero padding. + pub parts: &'a [Part<'a>], +} + +impl<'a> Formatted<'a> { + /// Returns the exact byte length of combined formatted result. + pub fn len(&self) -> usize { + let mut len = self.sign.len(); + for part in self.parts { + len += part.len(); + } + len + } + + /// Writes all formatted parts into the supplied buffer. + /// Returns the number of written bytes, or `None` if the buffer is not enough. + /// (It may still leave partially written bytes in the buffer; do not rely on that.) + pub fn write(&self, out: &mut [u8]) -> Option { + if out.len() < self.sign.len() { return None; } + bytes::copy_memory(self.sign, out); + + let mut written = self.sign.len(); + for part in self.parts { + match part.write(&mut out[written..]) { + Some(len) => { written += len; } + None => { return None; } + } + } + Some(written) + } +} + +/// Formats given decimal digits `0.<...buf...> * 10^exp` into the decimal form +/// with at least given number of fractional digits. The result is stored to +/// the supplied parts array and a slice of written parts is returned. +/// +/// `frac_digits` can be less than the number of actual fractional digits in `buf`; +/// it will be ignored and full digits will be printed. It is only used to print +/// additional zeroes after rendered digits. Thus `frac_digits` of 0 means that +/// it will only print given digits and nothing else. +fn digits_to_dec_str<'a>(buf: &'a [u8], exp: i16, frac_digits: usize, + parts: &'a mut [Part<'a>]) -> &'a [Part<'a>] { + assert!(!buf.is_empty()); + assert!(buf[0] > b'0'); + assert!(parts.len() >= 4); + + // if there is the restriction on the last digit position, `buf` is assumed to be + // left-padded with the virtual zeroes. the number of virtual zeroes, `nzeroes`, + // equals to `max(0, exp + frag_digits - buf.len())`, so that the position of + // the last digit `exp - buf.len() - nzeroes` is no more than `-frac_digits`: + // + // |<-virtual->| + // |<---- buf ---->| zeroes | exp + // 0. 1 2 3 4 5 6 7 8 9 _ _ _ _ _ _ x 10 + // | | | + // 10^exp 10^(exp-buf.len()) 10^(exp-buf.len()-nzeroes) + // + // `nzeroes` is individually calculated for each case in order to avoid overflow. + + if exp <= 0 { + // the decimal point is before rendered digits: [0.][000...000][1234][____] + let minus_exp = -(exp as i32) as usize; + parts[0] = Part::Copy(b"0."); + parts[1] = Part::Zero(minus_exp); + parts[2] = Part::Copy(buf); + if frac_digits > buf.len() && frac_digits - buf.len() > minus_exp { + parts[3] = Part::Zero((frac_digits - buf.len()) - minus_exp); + &parts[..4] + } else { + &parts[..3] + } + } else { + let exp = exp as usize; + if exp < buf.len() { + // the decimal point is inside rendered digits: [12][.][34][____] + parts[0] = Part::Copy(&buf[..exp]); + parts[1] = Part::Copy(b"."); + parts[2] = Part::Copy(&buf[exp..]); + if frac_digits > buf.len() - exp { + parts[3] = Part::Zero(frac_digits - (buf.len() - exp)); + &parts[..4] + } else { + &parts[..3] + } + } else { + // the decimal point is after rendered digits: [1234][____0000] or [1234][__][.][__]. + parts[0] = Part::Copy(buf); + parts[1] = Part::Zero(exp - buf.len()); + if frac_digits > 0 { + parts[2] = Part::Copy(b"."); + parts[3] = Part::Zero(frac_digits); + &parts[..4] + } else { + &parts[..2] + } + } + } +} + +/// Formats given decimal digits `0.<...buf...> * 10^exp` into the exponential form +/// with at least given number of significant digits. When `upper` is true, +/// the exponent will be prefixed by `E`; otherwise that's `e`. The result is +/// stored to the supplied parts array and a slice of written parts is returned. +/// +/// `min_digits` can be less than the number of actual significant digits in `buf`; +/// it will be ignored and full digits will be printed. It is only used to print +/// additional zeroes after rendered digits. Thus `min_digits` of 0 means that +/// it will only print given digits and nothing else. +fn digits_to_exp_str<'a>(buf: &'a [u8], exp: i16, min_ndigits: usize, upper: bool, + parts: &'a mut [Part<'a>]) -> &'a [Part<'a>] { + assert!(!buf.is_empty()); + assert!(buf[0] > b'0'); + assert!(parts.len() >= 6); + + let mut n = 0; + + parts[n] = Part::Copy(&buf[..1]); + n += 1; + + if buf.len() > 1 || min_ndigits > 1 { + parts[n] = Part::Copy(b"."); + parts[n + 1] = Part::Copy(&buf[1..]); + n += 2; + if min_ndigits > buf.len() { + parts[n] = Part::Zero(min_ndigits - buf.len()); + n += 1; + } + } + + // 0.1234 x 10^exp = 1.234 x 10^(exp-1) + let exp = exp as i32 - 1; // avoid underflow when exp is i16::MIN + if exp < 0 { + parts[n] = Part::Copy(if upper { b"E-" } else { b"e-" }); + parts[n + 1] = Part::Num(-exp as u16); + } else { + parts[n] = Part::Copy(if upper { b"E" } else { b"e" }); + parts[n + 1] = Part::Num(exp as u16); + } + &parts[..n + 2] +} + +/// Sign formatting options. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Sign { + /// Prints `-` only for the negative non-zero values. + Minus, // -inf -1 0 0 1 inf nan + /// Prints `-` only for any negative values (including the negative zero). + MinusRaw, // -inf -1 0 0 1 inf nan + /// Prints `-` for the negative non-zero values, or `+` otherwise. + MinusPlus, // -inf -1 +0 +0 +1 +inf nan + /// Prints `-` for any negative values (including the negative zero), or `+` otherwise. + MinusPlusRaw, // -inf -1 -0 +0 +1 +inf nan +} + +/// Returns the static byte string corresponding to the sign to be formatted. +/// It can be either `b""`, `b"+"` or `b"-"`. +fn determine_sign(sign: Sign, decoded: &FullDecoded, negative: bool) -> &'static [u8] { + match (*decoded, sign) { + (FullDecoded::Nan, _) => b"", + (FullDecoded::Zero, Sign::Minus) => b"", + (FullDecoded::Zero, Sign::MinusRaw) => if negative { b"-" } else { b"" }, + (FullDecoded::Zero, Sign::MinusPlus) => b"+", + (FullDecoded::Zero, Sign::MinusPlusRaw) => if negative { b"-" } else { b"+" }, + (_, Sign::Minus) | (_, Sign::MinusRaw) => if negative { b"-" } else { b"" }, + (_, Sign::MinusPlus) | (_, Sign::MinusPlusRaw) => if negative { b"-" } else { b"+" }, + } +} + +/// Formats given floating point number into the decimal form with at least +/// given number of fractional digits. The result is stored to the supplied parts +/// array while utilizing given byte buffer as a scratch. `upper` is only used to +/// determine the case of non-finite values, i.e. `inf` and `nan`. The first part +/// to be rendered is always a `Part::Sign` (which can be an empty string +/// if no sign is rendered). +/// +/// `format_shortest` should be the underlying digit-generation function. +/// You probably would want `strategy::grisu::format_shortest` for this. +/// +/// `frac_digits` can be less than the number of actual fractional digits in `v`; +/// it will be ignored and full digits will be printed. It is only used to print +/// additional zeroes after rendered digits. Thus `frac_digits` of 0 means that +/// it will only print given digits and nothing else. +/// +/// The byte buffer should be at least `MAX_SIG_DIGITS` bytes long. +/// There should be at least 5 parts available, due to the worst case like +/// `[+][0.][0000][45][0000]` with `frac_digits = 10`. +pub fn to_shortest_str<'a, T, F>(mut format_shortest: F, v: T, + sign: Sign, frac_digits: usize, upper: bool, + buf: &'a mut [u8], parts: &'a mut [Part<'a>]) -> Formatted<'a> + where T: DecodableFloat, F: FnMut(&Decoded, &mut [u8]) -> (usize, i16) { + assert!(parts.len() >= 4); + assert!(buf.len() >= MAX_SIG_DIGITS); + + let (negative, full_decoded) = decode(v); + let sign = determine_sign(sign, &full_decoded, negative); + match full_decoded { + FullDecoded::Nan => { + parts[0] = Part::Copy(if upper { b"NAN" } else { b"nan" }); + Formatted { sign: sign, parts: &parts[..1] } + } + FullDecoded::Infinite => { + parts[0] = Part::Copy(if upper { b"INF" } else { b"inf" }); + Formatted { sign: sign, parts: &parts[..1] } + } + FullDecoded::Zero => { + if frac_digits > 0 { // [0.][0000] + parts[0] = Part::Copy(b"0."); + parts[1] = Part::Zero(frac_digits); + Formatted { sign: sign, parts: &parts[..2] } + } else { + parts[0] = Part::Copy(b"0"); + Formatted { sign: sign, parts: &parts[..1] } + } + } + FullDecoded::Finite(ref decoded) => { + let (len, exp) = format_shortest(decoded, buf); + Formatted { sign: sign, + parts: digits_to_dec_str(&buf[..len], exp, frac_digits, parts) } + } + } +} + +/// Formats given floating point number into the decimal form or +/// the exponential form, depending on the resulting exponent. The result is +/// stored to the supplied parts array while utilizing given byte buffer +/// as a scratch. `upper` is used to determine the case of non-finite values +/// (`inf` and `nan`) or the case of the exponent prefix (`e` or `E`). +/// The first part to be rendered is always a `Part::Sign` (which can be +/// an empty string if no sign is rendered). +/// +/// `format_shortest` should be the underlying digit-generation function. +/// You probably would want `strategy::grisu::format_shortest` for this. +/// +/// The `dec_bounds` is a tuple `(lo, hi)` such that the number is formatted +/// as decimal only when `10^lo <= V < 10^hi`. Note that this is the *apparant* `V` +/// instead of the actual `v`! Thus any printed exponent in the exponential form +/// cannot be in this range, avoiding any confusion. +/// +/// The byte buffer should be at least `MAX_SIG_DIGITS` bytes long. +/// There should be at least 7 parts available, due to the worst case like +/// `[+][1][.][2345][e][-][67]`. +pub fn to_shortest_exp_str<'a, T, F>(mut format_shortest: F, v: T, + sign: Sign, dec_bounds: (i16, i16), upper: bool, + buf: &'a mut [u8], parts: &'a mut [Part<'a>]) -> Formatted<'a> + where T: DecodableFloat, F: FnMut(&Decoded, &mut [u8]) -> (usize, i16) { + assert!(parts.len() >= 6); + assert!(buf.len() >= MAX_SIG_DIGITS); + assert!(dec_bounds.0 <= dec_bounds.1); + + let (negative, full_decoded) = decode(v); + let sign = determine_sign(sign, &full_decoded, negative); + match full_decoded { + FullDecoded::Nan => { + parts[0] = Part::Copy(if upper { b"NAN" } else { b"nan" }); + Formatted { sign: sign, parts: &parts[..1] } + } + FullDecoded::Infinite => { + parts[0] = Part::Copy(if upper { b"INF" } else { b"inf" }); + Formatted { sign: sign, parts: &parts[..1] } + } + FullDecoded::Zero => { + parts[0] = if dec_bounds.0 <= 0 && 0 < dec_bounds.1 { + Part::Copy(b"0") + } else { + Part::Copy(if upper { b"0E0" } else { b"0e0" }) + }; + Formatted { sign: sign, parts: &parts[..1] } + } + FullDecoded::Finite(ref decoded) => { + let (len, exp) = format_shortest(decoded, buf); + let vis_exp = exp as i32 - 1; + let parts = if dec_bounds.0 as i32 <= vis_exp && vis_exp < dec_bounds.1 as i32 { + digits_to_dec_str(&buf[..len], exp, 0, parts) + } else { + digits_to_exp_str(&buf[..len], exp, 0, upper, parts) + }; + Formatted { sign: sign, parts: parts } + } + } +} + +/// Returns rather crude approximation (upper bound) for the maximum buffer size +/// calculated from the given decoded exponent. +/// +/// The exact limit is: +/// +/// - when `exp < 0`, the maximum length is `ceil(log_10 (5^-exp * (2^64 - 1)))`. +/// - when `exp >= 0`, the maximum length is `ceil(log_10 (2^exp * (2^64 - 1)))`. +/// +/// `ceil(log_10 (x^exp * (2^64 - 1)))` is less than `ceil(log_10 (2^64 - 1)) + +/// ceil(exp * log_10 x)`, which is in turn less than `20 + (1 + exp * log_10 x)`. +/// We use the facts that `log_10 2 < 5/16` and `log_10 5 < 12/16`, which is +/// enough for our purposes. +/// +/// Why do we need this? `format_exact` functions will fill the entire buffer +/// unless limited by the last digit restriction, but it is possible that +/// the number of digits requested is ridiculously large (say, 30,000 digits). +/// The vast majority of buffer will be filled with zeroes, so we don't want to +/// allocate all the buffer beforehand. Consequently, for any given arguments, +/// 826 bytes of buffer should be sufficient for `f64`. Compare this with +/// the actual number for the worst case: 770 bytes (when `exp = -1074`). +fn estimate_max_buf_len(exp: i16) -> usize { + 21 + ((if exp < 0 { -12 } else { 5 } * exp as i32) as usize >> 4) +} + +/// Formats given floating point number into the exponential form with +/// exactly given number of significant digits. The result is stored to +/// the supplied parts array while utilizing given byte buffer as a scratch. +/// `upper` is used to determine the case of the exponent prefix (`e` or `E`). +/// The first part to be rendered is always a `Part::Sign` (which can be +/// an empty string if no sign is rendered). +/// +/// `format_exact` should be the underlying digit-generation function. +/// You probably would want `strategy::grisu::format_exact` for this. +/// +/// The byte buffer should be at least `ndigits` bytes long unless `ndigits` is +/// so large that only the fixed number of digits will be ever written. +/// (The tipping point for `f64` is about 800, so 1000 bytes should be enough.) +/// There should be at least 7 parts available, due to the worst case like +/// `[+][1][.][2345][e][-][67]`. +pub fn to_exact_exp_str<'a, T, F>(mut format_exact: F, v: T, + sign: Sign, ndigits: usize, upper: bool, + buf: &'a mut [u8], parts: &'a mut [Part<'a>]) -> Formatted<'a> + where T: DecodableFloat, F: FnMut(&Decoded, &mut [u8], i16) -> (usize, i16) { + assert!(parts.len() >= 6); + assert!(ndigits > 0); + + let (negative, full_decoded) = decode(v); + let sign = determine_sign(sign, &full_decoded, negative); + match full_decoded { + FullDecoded::Nan => { + parts[0] = Part::Copy(if upper { b"NAN" } else { b"nan" }); + Formatted { sign: sign, parts: &parts[..1] } + } + FullDecoded::Infinite => { + parts[0] = Part::Copy(if upper { b"INF" } else { b"inf" }); + Formatted { sign: sign, parts: &parts[..1] } + } + FullDecoded::Zero => { + if ndigits > 1 { // [0.][0000][e0] + parts[0] = Part::Copy(b"0."); + parts[1] = Part::Zero(ndigits - 1); + parts[2] = Part::Copy(if upper { b"E0" } else { b"e0" }); + Formatted { sign: sign, parts: &parts[..3] } + } else { + parts[0] = Part::Copy(if upper { b"0E0" } else { b"0e0" }); + Formatted { sign: sign, parts: &parts[..1] } + } + } + FullDecoded::Finite(ref decoded) => { + let maxlen = estimate_max_buf_len(decoded.exp); + assert!(buf.len() >= ndigits || buf.len() >= maxlen); + + let trunc = if ndigits < maxlen { ndigits } else { maxlen }; + let (len, exp) = format_exact(decoded, &mut buf[..trunc], i16::MIN); + Formatted { sign: sign, + parts: digits_to_exp_str(&buf[..len], exp, ndigits, upper, parts) } + } + } +} + +/// Formats given floating point number into the decimal form with exactly +/// given number of fractional digits. The result is stored to the supplied parts +/// array while utilizing given byte buffer as a scratch. `upper` is only used to +/// determine the case of non-finite values, i.e. `inf` and `nan`. The first part +/// to be rendered is always a `Part::Sign` (which can be an empty string +/// if no sign is rendered). +/// +/// `format_exact` should be the underlying digit-generation function. +/// You probably would want `strategy::grisu::format_exact` for this. +/// +/// The byte buffer should be enough for the output unless `frac_digits` is +/// so large that only the fixed number of digits will be ever written. +/// (The tipping point for `f64` is about 800, and 1000 bytes should be enough.) +/// There should be at least 5 parts available, due to the worst case like +/// `[+][0.][0000][45][0000]` with `frac_digits = 10`. +pub fn to_exact_fixed_str<'a, T, F>(mut format_exact: F, v: T, + sign: Sign, frac_digits: usize, upper: bool, + buf: &'a mut [u8], parts: &'a mut [Part<'a>]) -> Formatted<'a> + where T: DecodableFloat, F: FnMut(&Decoded, &mut [u8], i16) -> (usize, i16) { + assert!(parts.len() >= 4); + + let (negative, full_decoded) = decode(v); + let sign = determine_sign(sign, &full_decoded, negative); + match full_decoded { + FullDecoded::Nan => { + parts[0] = Part::Copy(if upper { b"NAN" } else { b"nan" }); + Formatted { sign: sign, parts: &parts[..1] } + } + FullDecoded::Infinite => { + parts[0] = Part::Copy(if upper { b"INF" } else { b"inf" }); + Formatted { sign: sign, parts: &parts[..1] } + } + FullDecoded::Zero => { + if frac_digits > 0 { // [0.][0000] + parts[0] = Part::Copy(b"0."); + parts[1] = Part::Zero(frac_digits); + Formatted { sign: sign, parts: &parts[..2] } + } else { + parts[0] = Part::Copy(b"0"); + Formatted { sign: sign, parts: &parts[..1] } + } + } + FullDecoded::Finite(ref decoded) => { + let maxlen = estimate_max_buf_len(decoded.exp); + assert!(buf.len() >= maxlen); + + // it *is* possible that `frac_digits` is ridiculously large. + // `format_exact` will end rendering digits much earlier in this case, + // because we are strictly limited by `maxlen`. + let limit = if frac_digits < 0x8000 { -(frac_digits as i16) } else { i16::MIN }; + let (len, exp) = format_exact(decoded, &mut buf[..maxlen], limit); + if exp <= limit { + // `format_exact` always returns at least one digit even though the restriction + // hasn't been met, so we catch this condition and treats as like zeroes. + // this does not include the case that the restriction has been met + // only after the final rounding-up; it's a regular case with `exp = limit + 1`. + debug_assert_eq!(len, 0); + if frac_digits > 0 { // [0.][0000] + parts[0] = Part::Copy(b"0."); + parts[1] = Part::Zero(frac_digits); + Formatted { sign: sign, parts: &parts[..2] } + } else { + parts[0] = Part::Copy(b"0"); + Formatted { sign: sign, parts: &parts[..1] } + } + } else { + Formatted { sign: sign, + parts: digits_to_dec_str(&buf[..len], exp, frac_digits, parts) } + } + } + } +} + diff --git a/src/libcore/num/flt2dec/strategy/dragon.rs b/src/libcore/num/flt2dec/strategy/dragon.rs new file mode 100644 index 00000000000..bd735594e82 --- /dev/null +++ b/src/libcore/num/flt2dec/strategy/dragon.rs @@ -0,0 +1,327 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! +Almost direct (but slightly optimized) Rust translation of Figure 3 of [1]. + +[1] Burger, R. G. and Dybvig, R. K. 1996. Printing floating-point numbers + quickly and accurately. SIGPLAN Not. 31, 5 (May. 1996), 108-116. +*/ + +use prelude::*; +use num::Float; +use cmp::Ordering; + +use num::flt2dec::{Decoded, MAX_SIG_DIGITS, round_up}; +use num::flt2dec::estimator::estimate_scaling_factor; +use num::flt2dec::bignum::Digit32 as Digit; +use num::flt2dec::bignum::Big32x36 as Big; + +// FIXME(#22540) const ref to static array seems to ICE +static POW10: [Digit; 10] = [1, 10, 100, 1000, 10000, 100000, + 1000000, 10000000, 100000000, 1000000000]; +static TWOPOW10: [Digit; 10] = [2, 20, 200, 2000, 20000, 200000, + 2000000, 20000000, 200000000, 2000000000]; + +// precalculated arrays of `Digit`s for 10^(2^n) +static POW10TO16: [Digit; 2] = [0x6fc10000, 0x2386f2]; +static POW10TO32: [Digit; 4] = [0, 0x85acef81, 0x2d6d415b, 0x4ee]; +static POW10TO64: [Digit; 7] = [0, 0, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed, 0xe93ff9f4, 0x184f03]; +static POW10TO128: [Digit; 14] = + [0, 0, 0, 0, 0x2e953e01, 0x3df9909, 0xf1538fd, 0x2374e42f, 0xd3cff5ec, 0xc404dc08, + 0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x24e]; +static POW10TO256: [Digit; 27] = + [0, 0, 0, 0, 0, 0, 0, 0, 0x982e7c01, 0xbed3875b, 0xd8d99f72, 0x12152f87, 0x6bde50c6, + 0xcf4a6e70, 0xd595d80f, 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, + 0xcc5573c0, 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, 0x5fdcefce, 0x553f7]; + +#[doc(hidden)] +pub fn mul_pow10<'a>(x: &'a mut Big, n: usize) -> &'a mut Big { + debug_assert!(n < 512); + if n & 7 != 0 { x.mul_small(POW10[n & 7]); } + if n & 8 != 0 { x.mul_small(POW10[8]); } + if n & 16 != 0 { x.mul_digits(&POW10TO16); } + if n & 32 != 0 { x.mul_digits(&POW10TO32); } + if n & 64 != 0 { x.mul_digits(&POW10TO64); } + if n & 128 != 0 { x.mul_digits(&POW10TO128); } + if n & 256 != 0 { x.mul_digits(&POW10TO256); } + x +} + +fn div_2pow10<'a>(x: &'a mut Big, mut n: usize) -> &'a mut Big { + let largest = POW10.len() - 1; + while n > largest { + x.div_rem_small(POW10[largest]); + n -= largest; + } + x.div_rem_small(TWOPOW10[n]); + x +} + +// only usable when `x < 16 * scale`; `scaleN` should be `scale.mul_small(N)` +fn div_rem_upto_16<'a>(x: &'a mut Big, scale: &Big, + scale2: &Big, scale4: &Big, scale8: &Big) -> (u8, &'a mut Big) { + let mut d = 0; + if *x >= *scale8 { x.sub(scale8); d += 8; } + if *x >= *scale4 { x.sub(scale4); d += 4; } + if *x >= *scale2 { x.sub(scale2); d += 2; } + if *x >= *scale { x.sub(scale); d += 1; } + debug_assert!(*x < *scale); + (d, x) +} + +/// The shortest mode implementation for Dragon. +pub fn format_shortest(d: &Decoded, buf: &mut [u8]) -> (/*#digits*/ usize, /*exp*/ i16) { + // the number `v` to format is known to be: + // - equal to `mant * 2^exp`; + // - preceded by `(mant - 2 * minus) * 2^exp` in the original type; and + // - followed by `(mant + 2 * plus) * 2^exp` in the original type. + // + // obviously, `minus` and `plus` cannot be zero. (for infinities, we use out-of-range values.) + // also we assume that at least one digit is generated, i.e. `mant` cannot be zero too. + // + // this also means that any number between `low = (mant - minus) * 2^exp` and + // `high = (mant + plus) * 2^exp` will map to this exact floating point number, + // with bounds included when the original mantissa was even (i.e. `!mant_was_odd`). + + assert!(d.mant > 0); + assert!(d.minus > 0); + assert!(d.plus > 0); + assert!(d.mant.checked_add(d.plus).is_some()); + assert!(d.mant.checked_sub(d.minus).is_some()); + assert!(buf.len() >= MAX_SIG_DIGITS); + + // `a.cmp(&b) < rounding` is `if d.inclusive {a <= b} else {a < b}` + let rounding = if d.inclusive {Ordering::Greater} else {Ordering::Equal}; + + // estimate `k_0` from original inputs satisfying `10^(k_0-1) < high <= 10^(k_0+1)`. + // the tight bound `k` satisfying `10^(k-1) < high <= 10^k` is calculated later. + let mut k = estimate_scaling_factor(d.mant + d.plus, d.exp); + + // convert `{mant, plus, minus} * 2^exp` into the fractional form so that: + // - `v = mant / scale` + // - `low = (mant - minus) / scale` + // - `high = (mant + plus) / scale` + let mut mant = Big::from_u64(d.mant); + let mut minus = Big::from_u64(d.minus); + let mut plus = Big::from_u64(d.plus); + let mut scale = Big::from_small(1); + if d.exp < 0 { + scale.mul_pow2(-d.exp as usize); + } else { + mant.mul_pow2(d.exp as usize); + minus.mul_pow2(d.exp as usize); + plus.mul_pow2(d.exp as usize); + } + + // divide `mant` by `10^k`. now `scale / 10 < mant + plus <= scale * 10`. + if k >= 0 { + mul_pow10(&mut scale, k as usize); + } else { + mul_pow10(&mut mant, -k as usize); + mul_pow10(&mut minus, -k as usize); + mul_pow10(&mut plus, -k as usize); + } + + // fixup when `mant + plus > scale` (or `>=`). + // we are not actually modifying `scale`, since we can skip the initial multiplication instead. + // now `scale < mant + plus <= scale * 10` and we are ready to generate digits. + // + // note that `d[0]` *can* be zero, when `scale - plus < mant < scale`. + // in this case rounding-up condition (`up` below) will be triggered immediately. + if scale.cmp(mant.clone().add(&plus)) < rounding { + // equivalent to scaling `scale` by 10 + k += 1; + } else { + mant.mul_small(10); + minus.mul_small(10); + plus.mul_small(10); + } + + // cache `(2, 4, 8) * scale` for digit generation. + let mut scale2 = scale.clone(); scale2.mul_pow2(1); + let mut scale4 = scale.clone(); scale4.mul_pow2(2); + let mut scale8 = scale.clone(); scale8.mul_pow2(3); + + let mut down; + let mut up; + let mut i = 0; + loop { + // invariants, where `d[0..n-1]` are digits generated so far: + // - `v = mant / scale * 10^(k-n-1) + d[0..n-1] * 10^(k-n)` + // - `v - low = minus / scale * 10^(k-n-1)` + // - `high - v = plus / scale * 10^(k-n-1)` + // - `(mant + plus) / scale <= 10` (thus `mant / scale < 10`) + // where `d[i..j]` is a shorthand for `d[i] * 10^(j-i) + ... + d[j-1] * 10 + d[j]`. + + // generate one digit: `d[n] = floor(mant / scale) < 10`. + let (d, _) = div_rem_upto_16(&mut mant, &scale, &scale2, &scale4, &scale8); + debug_assert!(d < 10); + buf[i] = b'0' + d; + i += 1; + + // this is a simplified description of the modified Dragon algorithm. + // many intermediate derivations and completeness arguments are omitted for convenience. + // + // start with modified invariants, as we've updated `n`: + // - `v = mant / scale * 10^(k-n) + d[0..n-1] * 10^(k-n)` + // - `v - low = minus / scale * 10^(k-n)` + // - `high - v = plus / scale * 10^(k-n)` + // + // assume that `d[0..n-1]` is the shortest representation between `low` and `high`, + // i.e. `d[0..n-1]` satisfies both of the following but `d[0..n-2]` doesn't: + // - `low < d[0..n-1] * 10^(k-n) < high` (bijectivity: digits round to `v`); and + // - `abs(v / 10^(k-n) - d[0..n-1]) <= 1/2` (the last digit is correct). + // + // the second condition simplifies to `2 * mant <= scale`. + // solving invariants in terms of `mant`, `low` and `high` yields + // a simpler version of the first condition: `-plus < mant < minus`. + // since `-plus < 0 <= mant`, we have the correct shortest representation + // when `mant < minus` and `2 * mant <= scale`. + // (the former becomes `mant <= minus` when the original mantissa is even.) + // + // when the second doesn't hold (`2 * mant > scale`), we need to increase the last digit. + // this is enough for restoring that condition: we already know that + // the digit generation guarantees `0 <= v / 10^(k-n) - d[0..n-1] < 1`. + // in this case, the first condition becomes `-plus < mant - scale < minus`. + // since `mant < scale` after the generation, we have `scale < mant + plus`. + // (again, this becomes `scale <= mant + plus` when the original mantissa is even.) + // + // in short: + // - stop and round `down` (keep digits as is) when `mant < minus` (or `<=`). + // - stop and round `up` (increase the last digit) when `scale < mant + plus` (or `<=`). + // - keep generating otherwise. + down = mant.cmp(&minus) < rounding; + up = scale.cmp(mant.clone().add(&plus)) < rounding; + if down || up { break; } // we have the shortest representation, proceed to the rounding + + // restore the invariants. + // this makes the algorithm always terminating: `minus` and `plus` always increases, + // but `mant` is clipped modulo `scale` and `scale` is fixed. + mant.mul_small(10); + minus.mul_small(10); + plus.mul_small(10); + } + + // rounding up happens when + // i) only the rounding-up condition was triggered, or + // ii) both conditions were triggered and tie breaking prefers rounding up. + if up && (!down || *mant.mul_pow2(1) >= scale) { + // if rounding up changes the length, the exponent should also change. + // it seems that this condition is very hard to satisfy (possibly impossible), + // but we are just being safe and consistent here. + if let Some(c) = round_up(buf, i) { + buf[i] = c; + i += 1; + k += 1; + } + } + + (i, k) +} + +/// The exact and fixed mode implementation for Dragon. +pub fn format_exact(d: &Decoded, buf: &mut [u8], limit: i16) -> (/*#digits*/ usize, /*exp*/ i16) { + assert!(d.mant > 0); + assert!(d.minus > 0); + assert!(d.plus > 0); + assert!(d.mant.checked_add(d.plus).is_some()); + assert!(d.mant.checked_sub(d.minus).is_some()); + + // estimate `k_0` from original inputs satisfying `10^(k_0-1) < v <= 10^(k_0+1)`. + let mut k = estimate_scaling_factor(d.mant, d.exp); + + // `v = mant / scale`. + let mut mant = Big::from_u64(d.mant); + let mut scale = Big::from_small(1); + if d.exp < 0 { + scale.mul_pow2(-d.exp as usize); + } else { + mant.mul_pow2(d.exp as usize); + } + + // divide `mant` by `10^k`. now `scale / 10 < mant <= scale * 10`. + if k >= 0 { + mul_pow10(&mut scale, k as usize); + } else { + mul_pow10(&mut mant, -k as usize); + } + + // fixup when `mant + plus >= scale`, where `plus / scale = 10^-buf.len() / 2`. + // in order to keep the fixed-size bignum, we actually use `mant + floor(plus) >= scale`. + // we are not actually modifying `scale`, since we can skip the initial multiplication instead. + // again with the shortest algorithm, `d[0]` can be zero but will be eventually rounded up. + if *div_2pow10(&mut scale.clone(), buf.len()).add(&mant) >= scale { + // equivalent to scaling `scale` by 10 + k += 1; + } else { + mant.mul_small(10); + } + + // if we are working with the last-digit limitation, we need to shorten the buffer + // before the actual rendering in order to avoid double rounding. + // note that we have to enlarge the buffer again when rounding up happens! + let mut len = if k < limit { + // oops, we cannot even produce *one* digit. + // this is possible when, say, we've got something like 9.5 and it's being rounded to 10. + // we return an empty buffer, with an exception of the later rounding-up case + // which occurs when `k == limit` and has to produce exactly one digit. + 0 + } else if ((k as i32 - limit as i32) as usize) < buf.len() { + (k - limit) as usize + } else { + buf.len() + }; + + if len > 0 { + // cache `(2, 4, 8) * scale` for digit generation. + // (this can be expensive, so do not calculate them when the buffer is empty.) + let mut scale2 = scale.clone(); scale2.mul_pow2(1); + let mut scale4 = scale.clone(); scale4.mul_pow2(2); + let mut scale8 = scale.clone(); scale8.mul_pow2(3); + + for i in 0..len { + if mant.is_zero() { // following digits are all zeroes, we stop here + // do *not* try to perform rounding! rather, fill remaining digits. + for c in &mut buf[i..len] { *c = b'0'; } + return (len, k); + } + + let mut d = 0; + if mant >= scale8 { mant.sub(&scale8); d += 8; } + if mant >= scale4 { mant.sub(&scale4); d += 4; } + if mant >= scale2 { mant.sub(&scale2); d += 2; } + if mant >= scale { mant.sub(&scale); d += 1; } + debug_assert!(mant < scale); + debug_assert!(d < 10); + buf[i] = b'0' + d; + mant.mul_small(10); + } + } + + // rounding up if we stop in the middle of digits + if mant >= *scale.mul_small(5) { + // if rounding up changes the length, the exponent should also change. + // but we've been requested a fixed number of digits, so do not alter the buffer... + if let Some(c) = round_up(buf, len) { + // ...unless we've been requested the fixed precision instead. + // we also need to check that, if the original buffer was empty, + // the additional digit can only be added when `k == limit` (edge case). + k += 1; + if k > limit && len < buf.len() { + buf[len] = c; + len += 1; + } + } + } + + (len, k) +} + diff --git a/src/libcore/num/flt2dec/strategy/grisu.rs b/src/libcore/num/flt2dec/strategy/grisu.rs new file mode 100644 index 00000000000..220811e9985 --- /dev/null +++ b/src/libcore/num/flt2dec/strategy/grisu.rs @@ -0,0 +1,749 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! +Rust adaptation of Grisu3 algorithm described in [1]. It uses about +1KB of precomputed table, and in turn, it's very quick for most inputs. + +[1] Florian Loitsch. 2010. Printing floating-point numbers quickly and + accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243. +*/ + +use prelude::*; +use num::Float; + +use num::flt2dec::{Decoded, MAX_SIG_DIGITS, round_up}; + +/// A custom 64-bit floating point type, representing `f * 2^e`. +#[derive(Copy, Clone, Debug)] +#[doc(hidden)] +pub struct Fp { + /// The integer mantissa. + pub f: u64, + /// The exponent in base 2. + pub e: i16, +} + +impl Fp { + /// Returns a correctly rounded product of itself and `other`. + fn mul(&self, other: &Fp) -> Fp { + const MASK: u64 = 0xffffffff; + let a = self.f >> 32; + let b = self.f & MASK; + let c = other.f >> 32; + let d = other.f & MASK; + let ac = a * c; + let bc = b * c; + let ad = a * d; + let bd = b * d; + let tmp = (bd >> 32) + (ad & MASK) + (bc & MASK) + (1 << 31) /* round */; + let f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32); + let e = self.e + other.e + 64; + Fp { f: f, e: e } + } + + /// Normalizes itself so that the resulting mantissa is at least `2^63`. + fn normalize(&self) -> Fp { + let mut f = self.f; + let mut e = self.e; + if f >> (64 - 32) == 0 { f <<= 32; e -= 32; } + if f >> (64 - 16) == 0 { f <<= 16; e -= 16; } + if f >> (64 - 8) == 0 { f <<= 8; e -= 8; } + if f >> (64 - 4) == 0 { f <<= 4; e -= 4; } + if f >> (64 - 2) == 0 { f <<= 2; e -= 2; } + if f >> (64 - 1) == 0 { f <<= 1; e -= 1; } + debug_assert!(f >= (1 >> 63)); + Fp { f: f, e: e } + } + + /// Normalizes itself to have the shared exponent. + /// It can only decrease the exponent (and thus increase the mantissa). + fn normalize_to(&self, e: i16) -> Fp { + let edelta = self.e - e; + assert!(edelta >= 0); + let edelta = edelta as usize; + assert_eq!(self.f << edelta >> edelta, self.f); + Fp { f: self.f << edelta, e: e } + } +} + +// see the comments in `format_shortest_opt` for the rationale. +#[doc(hidden)] pub const ALPHA: i16 = -60; +#[doc(hidden)] pub const GAMMA: i16 = -32; + +/* +# the following Python code generates this table: +for i in xrange(-308, 333, 8): + if i >= 0: f = 10**i; e = 0 + else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80 + l = f.bit_length() + f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64 + print ' (%#018x, %5d, %4d),' % (f, e, i) +*/ +// FIXME(#22540) const ref to static array seems to ICE +#[doc(hidden)] +pub static CACHED_POW10: [(u64, i16, i16); 81] = [ // (f, e, k) + (0xe61acf033d1a45df, -1087, -308), + (0xab70fe17c79ac6ca, -1060, -300), + (0xff77b1fcbebcdc4f, -1034, -292), + (0xbe5691ef416bd60c, -1007, -284), + (0x8dd01fad907ffc3c, -980, -276), + (0xd3515c2831559a83, -954, -268), + (0x9d71ac8fada6c9b5, -927, -260), + (0xea9c227723ee8bcb, -901, -252), + (0xaecc49914078536d, -874, -244), + (0x823c12795db6ce57, -847, -236), + (0xc21094364dfb5637, -821, -228), + (0x9096ea6f3848984f, -794, -220), + (0xd77485cb25823ac7, -768, -212), + (0xa086cfcd97bf97f4, -741, -204), + (0xef340a98172aace5, -715, -196), + (0xb23867fb2a35b28e, -688, -188), + (0x84c8d4dfd2c63f3b, -661, -180), + (0xc5dd44271ad3cdba, -635, -172), + (0x936b9fcebb25c996, -608, -164), + (0xdbac6c247d62a584, -582, -156), + (0xa3ab66580d5fdaf6, -555, -148), + (0xf3e2f893dec3f126, -529, -140), + (0xb5b5ada8aaff80b8, -502, -132), + (0x87625f056c7c4a8b, -475, -124), + (0xc9bcff6034c13053, -449, -116), + (0x964e858c91ba2655, -422, -108), + (0xdff9772470297ebd, -396, -100), + (0xa6dfbd9fb8e5b88f, -369, -92), + (0xf8a95fcf88747d94, -343, -84), + (0xb94470938fa89bcf, -316, -76), + (0x8a08f0f8bf0f156b, -289, -68), + (0xcdb02555653131b6, -263, -60), + (0x993fe2c6d07b7fac, -236, -52), + (0xe45c10c42a2b3b06, -210, -44), + (0xaa242499697392d3, -183, -36), + (0xfd87b5f28300ca0e, -157, -28), + (0xbce5086492111aeb, -130, -20), + (0x8cbccc096f5088cc, -103, -12), + (0xd1b71758e219652c, -77, -4), + (0x9c40000000000000, -50, 4), + (0xe8d4a51000000000, -24, 12), + (0xad78ebc5ac620000, 3, 20), + (0x813f3978f8940984, 30, 28), + (0xc097ce7bc90715b3, 56, 36), + (0x8f7e32ce7bea5c70, 83, 44), + (0xd5d238a4abe98068, 109, 52), + (0x9f4f2726179a2245, 136, 60), + (0xed63a231d4c4fb27, 162, 68), + (0xb0de65388cc8ada8, 189, 76), + (0x83c7088e1aab65db, 216, 84), + (0xc45d1df942711d9a, 242, 92), + (0x924d692ca61be758, 269, 100), + (0xda01ee641a708dea, 295, 108), + (0xa26da3999aef774a, 322, 116), + (0xf209787bb47d6b85, 348, 124), + (0xb454e4a179dd1877, 375, 132), + (0x865b86925b9bc5c2, 402, 140), + (0xc83553c5c8965d3d, 428, 148), + (0x952ab45cfa97a0b3, 455, 156), + (0xde469fbd99a05fe3, 481, 164), + (0xa59bc234db398c25, 508, 172), + (0xf6c69a72a3989f5c, 534, 180), + (0xb7dcbf5354e9bece, 561, 188), + (0x88fcf317f22241e2, 588, 196), + (0xcc20ce9bd35c78a5, 614, 204), + (0x98165af37b2153df, 641, 212), + (0xe2a0b5dc971f303a, 667, 220), + (0xa8d9d1535ce3b396, 694, 228), + (0xfb9b7cd9a4a7443c, 720, 236), + (0xbb764c4ca7a44410, 747, 244), + (0x8bab8eefb6409c1a, 774, 252), + (0xd01fef10a657842c, 800, 260), + (0x9b10a4e5e9913129, 827, 268), + (0xe7109bfba19c0c9d, 853, 276), + (0xac2820d9623bf429, 880, 284), + (0x80444b5e7aa7cf85, 907, 292), + (0xbf21e44003acdd2d, 933, 300), + (0x8e679c2f5e44ff8f, 960, 308), + (0xd433179d9c8cb841, 986, 316), + (0x9e19db92b4e31ba9, 1013, 324), + (0xeb96bf6ebadf77d9, 1039, 332), +]; + +#[doc(hidden)] pub const CACHED_POW10_FIRST_E: i16 = -1087; +#[doc(hidden)] pub const CACHED_POW10_LAST_E: i16 = 1039; + +#[doc(hidden)] +pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) { + let offset = CACHED_POW10_FIRST_E as i32; + let range = (CACHED_POW10.len() as i32) - 1; + let domain = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32; + let idx = ((gamma as i32) - offset) * range / domain; + let (f, e, k) = CACHED_POW10[idx as usize]; + debug_assert!(alpha <= e && e <= gamma); + (k, Fp { f: f, e: e }) +} + +/// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`. +#[doc(hidden)] +pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) { + debug_assert!(x > 0); + + const X9: u32 = 10_0000_0000; + const X8: u32 = 1_0000_0000; + const X7: u32 = 1000_0000; + const X6: u32 = 100_0000; + const X5: u32 = 10_0000; + const X4: u32 = 1_0000; + const X3: u32 = 1000; + const X2: u32 = 100; + const X1: u32 = 10; + + if x < X4 { + if x < X2 { if x < X1 {(0, 1)} else {(1, X1)} } + else { if x < X3 {(2, X2)} else {(3, X3)} } + } else { + if x < X6 { if x < X5 {(4, X4)} else {(5, X5)} } + else if x < X8 { if x < X7 {(6, X6)} else {(7, X7)} } + else { if x < X9 {(8, X8)} else {(9, X9)} } + } +} + +/// The shortest mode implementation for Grisu. +/// +/// It returns `None` when it would return an inexact representation otherwise. +pub fn format_shortest_opt(d: &Decoded, + buf: &mut [u8]) -> Option<(/*#digits*/ usize, /*exp*/ i16)> { + assert!(d.mant > 0); + assert!(d.minus > 0); + assert!(d.plus > 0); + assert!(d.mant.checked_add(d.plus).is_some()); + assert!(d.mant.checked_sub(d.minus).is_some()); + assert!(buf.len() >= MAX_SIG_DIGITS); + assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision + + // start with the normalized values with the shared exponent + let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize(); + let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e); + let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e); + + // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`. + // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`; + // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`. + // + // it is obviously desirable to maximize `GAMMA - ALPHA`, + // so that we don't need many cached powers of 10, but there are some considerations: + // + // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division. + // (this is not really avoidable, remainder is required for accuracy estimation.) + // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10, + // and it should not overflow. + // + // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`; + // -60 and -32 is the maximal range with this constraint, and V8 also uses them. + let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64); + + // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1). + let plus = plus.mul(&cached); + let minus = minus.mul(&cached); + let v = v.mul(&cached); + debug_assert_eq!(plus.e, minus.e); + debug_assert_eq!(plus.e, v.e); + + // +- actual range of minus + // | <---|---------------------- unsafe region --------------------------> | + // | | | + // | |<--->| | <--------------- safe region ---------------> | | + // | | | | | | + // |1 ulp|1 ulp| |1 ulp|1 ulp| |1 ulp|1 ulp| + // |<--->|<--->| |<--->|<--->| |<--->|<--->| + // |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----| + // | minus | | v | | plus | + // minus1 minus0 v - 1 ulp v + 1 ulp plus0 plus1 + // + // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp). + // as we don't know the error is positive or negative, we use two approximations spaced equally + // and have the maximal error of 2 ulps. + // + // the "unsafe region" is a liberal interval which we initially generate. + // the "safe region" is a conservative interval which we only accept. + // we start with the correct repr within the unsafe region, and try to find the closest repr + // to `v` which is also within the safe region. if we can't, we give up. + let plus1 = plus.f + 1; +// let plus0 = plus.f - 1; // only for explanation +// let minus0 = minus.f + 1; // only for explanation + let minus1 = minus.f - 1; + let e = -plus.e as usize; // shared exponent + + // divide `plus1` into integral and fractional parts. + // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32` + // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement. + let plus1int = (plus1 >> e) as u32; + let plus1frac = plus1 & ((1 << e) - 1); + + // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`). + // this is an upper bound of `kappa` below. + let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int); + + let mut i = 0; + let exp = max_kappa as i16 - minusk + 1; + + // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`, + // then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest + // representations (with the minimal number of significant digits) in that range. + // + // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2. + // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead. + // (e.g. `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.) + // the algorithm relies on the later verification phase to exclude `y`. + let delta1 = plus1 - minus1; +// let delta1int = (delta1 >> e) as usize; // only for explanation + let delta1frac = delta1 & ((1 << e) - 1); + + // render integral parts, while checking for the accuracy at each step. + let mut kappa = max_kappa as i16; + let mut ten_kappa = max_ten_kappa; // 10^kappa + let mut remainder = plus1int; // digits yet to be rendered + loop { // we always have at least one digit to render, as `plus1 >= 10^kappa` + // invariants: + // - `delta1int <= remainder < 10^(kappa+1)` + // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder` + // (it follows that `remainder = plus1int % 10^(kappa+1)`) + + // divide `remainder` by `10^kappa`. both are scaled by `2^-e`. + let q = remainder / ten_kappa; + let r = remainder % ten_kappa; + debug_assert!(q < 10); + buf[i] = b'0' + q as u8; + i += 1; + + let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e + if plus1rem < delta1 { + // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`. + let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent + return round_and_weed(&mut buf[..i], exp, plus1rem, delta1, plus1 - v.f, ten_kappa, 1); + } + + // break the loop when we have rendered all integral digits. + // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`. + if i > max_kappa as usize { + debug_assert_eq!(ten_kappa, 1); + debug_assert_eq!(kappa, 0); + break; + } + + // restore invariants + kappa -= 1; + ten_kappa /= 10; + remainder = r; + } + + // render fractional parts, while checking for the accuracy at each step. + // this time we rely on repeated multiplications, as division will lose the precision. + let mut remainder = plus1frac; + let mut threshold = delta1frac; + let mut ulp = 1; + loop { // the next digit should be significant as we've tested that before breaking out + // invariants, where `m = max_kappa + 1` (# of digits in the integral part): + // - `remainder < 2^e` + // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder` + + remainder *= 10; // won't overflow, `2^e * 10 < 2^64` + threshold *= 10; + ulp *= 10; + + // divide `remainder` by `10^kappa`. + // both are scaled by `2^e / 10^kappa`, so the latter is implicit here. + let q = remainder >> e; + let r = remainder & ((1 << e) - 1); + debug_assert!(q < 10); + buf[i] = b'0' + q as u8; + i += 1; + + if r < threshold { + let ten_kappa = 1 << e; // implicit divisor + return round_and_weed(&mut buf[..i], exp, r, threshold, + (plus1 - v.f) * ulp, ten_kappa, ulp); + } + + // restore invariants + kappa -= 1; + remainder = r; + } + + // we've generated all significant digits of `plus1`, but not sure if it's the optimal one. + // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different + // shortest representation from 3.14154 to 3.14158 but we only have the greatest one. + // we have to successively decrease the last digit and check if this is the optimal repr. + // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase) + // + // the function checks if this "optimal" repr is actually within the ulp ranges, + // and also, it is possible that the "second-to-optimal" repr can actually be optimal + // due to the rounding error. in either cases this returns `None`. ("weeding" phase) + // + // all arguments here are scaled by the common (but implicit) value `k`, so that: + // - `remainder = (plus1 % 10^kappa) * k` + // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`) + // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants) + // - `ten_kappa = 10^kappa * k` + // - `ulp = 2^-e * k` + fn round_and_weed(buf: &mut [u8], exp: i16, remainder: u64, threshold: u64, plus1v: u64, + ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> { + assert!(!buf.is_empty()); + + // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps. + // the resulting representation should be the closest representation to both. + // + // here `plus1 - v` is used since calculations are done with respect to `plus1` + // in order to avoid overflow/underflow (hence the seemingly swapped names). + let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp) + let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp) + + // decrease the last digit and stop at the closest representation to `v + 1 ulp`. + let mut plus1w = remainder; // plus1w(n) = plus1 - w(n) + { + let last = buf.last_mut().unwrap(); + + // we work with the approximated digits `w(n)`, which is initially equal to `plus1 - + // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 - + // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) = + // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks. + // note that `plus1w(n)` is always increasing. + // + // we have three conditions to terminate. any of them will make the loop unable to + // proceed, but we then have at least one valid representation known to be closest to + // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity. + // + // TC1: `w(n) <= v + 1 ulp`, i.e. this is the last repr that can be the closest one. + // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`. + // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible + // overflow on the calculation of `plus1w(n)`. + // + // TC2: `w(n+1) < minus1`, i.e. the next repr definitely does not round to `v`. + // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa > + // plus1 - minus1 = threshold`. the left hand side can overflow, but we know + // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) > + // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if + // `threshold - plus1w(n) < 10^kappa` instead. + // + // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e. the next repr is + // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`, + // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have + // `z(n) > 0`. we have two cases to consider: + // + // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing, + // `z(n)` should be decreasing and this is clearly false. + // - when `z(n+1) < 0`: + // - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is + // false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow. + // - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e. `plus1v_up - plus1w(n) >= + // plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1 + // gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when + // combined with TC3a. + // + // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is + // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`. + while plus1w < plus1v_up && + threshold - plus1w >= ten_kappa && + (plus1w + ten_kappa < plus1v_up || + plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up) { + *last -= 1; + debug_assert!(*last > b'0'); // the shortest repr cannot end with `0` + plus1w += ten_kappa; + } + } + + // check if this representation is also the closest representation to `v - 1 ulp`. + // + // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up` + // replaced by `plus1v_down` instead. overflow analysis equally holds. + if plus1w < plus1v_down && + threshold - plus1w >= ten_kappa && + (plus1w + ten_kappa < plus1v_down || + plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down) { + return None; + } + + // now we have the closest representation to `v` between `plus1` and `minus1`. + // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`, + // i.e. `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts + // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`. + if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp { + Some((buf.len(), exp)) + } else { + None + } + } +} + +/// The shortest mode implementation for Grisu with Dragon fallback. +/// +/// This should be used for most cases. +pub fn format_shortest(d: &Decoded, buf: &mut [u8]) -> (/*#digits*/ usize, /*exp*/ i16) { + use num::flt2dec::strategy::dragon::format_shortest as fallback; + match format_shortest_opt(d, buf) { + Some(ret) => ret, + None => fallback(d, buf), + } +} + +/// The exact and fixed mode implementation for Grisu. +/// +/// It returns `None` when it would return an inexact representation otherwise. +pub fn format_exact_opt(d: &Decoded, buf: &mut [u8], limit: i16) + -> Option<(/*#digits*/ usize, /*exp*/ i16)> { + assert!(d.mant > 0); + assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision + assert!(!buf.is_empty()); + + // normalize and scale `v`. + let v = Fp { f: d.mant, e: d.exp }.normalize(); + let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64); + let v = v.mul(&cached); + + // divide `v` into integral and fractional parts. + let e = -v.e as usize; + let vint = (v.f >> e) as u32; + let vfrac = v.f & ((1 << e) - 1); + + // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1). + // as we don't know the error is positive or negative, we use two approximations + // spaced equally and have the maximal error of 2 ulps (same to the shortest case). + // + // the goal is to find the exactly rounded series of digits that are common to + // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident. + // if this is not possible, we don't know which one is the correct output for `v`, + // so we give up and fall back. + // + // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`), + // and we will scale it whenever `v` gets scaled. + let mut err = 1; + + // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`). + // this is an upper bound of `kappa` below. + let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint); + + let mut i = 0; + let exp = max_kappa as i16 - minusk + 1; + + // if we are working with the last-digit limitation, we need to shorten the buffer + // before the actual rendering in order to avoid double rounding. + // note that we have to enlarge the buffer again when rounding up happens! + let len = if exp <= limit { + // oops, we cannot even produce *one* digit. + // this is possible when, say, we've got something like 9.5 and it's being rounded to 10. + // + // in principle we can immediately call `possibly_round` with an empty buffer, + // but scaling `max_ten_kappa << e` by 10 can result in overflow. + // thus we are being sloppy here and widen the error range by a factor of 10. + // this will increase the false negative rate, but only very, *very* slightly; + // it can only matter noticably when the mantissa is bigger than 60 bits. + return possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e); + } else if ((exp as i32 - limit as i32) as usize) < buf.len() { + (exp - limit) as usize + } else { + buf.len() + }; + debug_assert!(len > 0); + + // render integral parts. + // the error is entirely fractional, so we don't need to check it in this part. + let mut kappa = max_kappa as i16; + let mut ten_kappa = max_ten_kappa; // 10^kappa + let mut remainder = vint; // digits yet to be rendered + loop { // we always have at least one digit to render + // invariants: + // - `remainder < 10^(kappa+1)` + // - `vint = d[0..n-1] * 10^(kappa+1) + remainder` + // (it follows that `remainder = vint % 10^(kappa+1)`) + + // divide `remainder` by `10^kappa`. both are scaled by `2^-e`. + let q = remainder / ten_kappa; + let r = remainder % ten_kappa; + debug_assert!(q < 10); + buf[i] = b'0' + q as u8; + i += 1; + + // is the buffer full? run the rounding pass with the remainder. + if i == len { + let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e + return possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e); + } + + // break the loop when we have rendered all integral digits. + // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`. + if i > max_kappa as usize { + debug_assert_eq!(ten_kappa, 1); + debug_assert_eq!(kappa, 0); + break; + } + + // restore invariants + kappa -= 1; + ten_kappa /= 10; + remainder = r; + } + + // render fractional parts. + // + // in principle we can continue to the last available digit and check for the accuracy. + // unfortunately we are working with the finite-sized integers, so we need some criterion + // to detect the overflow. V8 uses `remainder > err`, which becomes false when + // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects + // too many otherwise valid input. + // + // since the later phase has a correct overflow detection, we instead use tighter criterion: + // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and + // `v + 1 ulp` definitely contains two or more rounded representations. this is same to + // the first two comparisons from `possibly_round`, for the reference. + let mut remainder = vfrac; + let maxerr = 1 << (e - 1); + while err < maxerr { + // invariants, where `m = max_kappa + 1` (# of digits in the integral part): + // - `remainder < 2^e` + // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder` + // - `err = 10^(n-m)` + + remainder *= 10; // won't overflow, `2^e * 10 < 2^64` + err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64` + + // divide `remainder` by `10^kappa`. + // both are scaled by `2^e / 10^kappa`, so the latter is implicit here. + let q = remainder >> e; + let r = remainder & ((1 << e) - 1); + debug_assert!(q < 10); + buf[i] = b'0' + q as u8; + i += 1; + + // is the buffer full? run the rounding pass with the remainder. + if i == len { + return possibly_round(buf, len, exp, limit, r, 1 << e, err); + } + + // restore invariants + remainder = r; + } + + // further calculation is useless (`possibly_round` definitely fails), so we give up. + return None; + + // we've generated all requested digits of `v`, which should be also same to corresponding + // digits of `v - 1 ulp`. now we check if there is a unique representation shared by + // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or + // to the rounded-up version of those digits. if the range contains multiple representations + // of the same length, we cannot be sure and should return `None` instead. + // + // all arguments here are scaled by the common (but implicit) value `k`, so that: + // - `remainder = (v % 10^kappa) * k` + // - `ten_kappa = 10^kappa * k` + // - `ulp = 2^-e * k` + fn possibly_round(buf: &mut [u8], mut len: usize, mut exp: i16, limit: i16, + remainder: u64, ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> { + debug_assert!(remainder < ten_kappa); + + // 10^kappa + // : : :<->: : + // : : : : : + // :|1 ulp|1 ulp| : + // :|<--->|<--->| : + // ----|-----|-----|---- + // | v | + // v - 1 ulp v + 1 ulp + // + // (for the reference, the dotted line indicates the exact value for + // possible representations in given number of digits.) + // + // error is too large that there are at least three possible representations + // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct. + if ulp >= ten_kappa { return None; } + + // 10^kappa + // :<------->: + // : : + // : |1 ulp|1 ulp| + // : |<--->|<--->| + // ----|-----|-----|---- + // | v | + // v - 1 ulp v + 1 ulp + // + // in fact, 1/2 ulp is enough to introduce two possible representations. + // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.) + // this won't overflow, as `ulp < ten_kappa` from the first check. + if ten_kappa - ulp <= ulp { return None; } + + // remainder + // :<->| : + // : | : + // :<--------- 10^kappa ---------->: + // | : | : + // |1 ulp|1 ulp| : + // |<--->|<--->| : + // ----|-----|-----|------------------------ + // | v | + // v - 1 ulp v + 1 ulp + // + // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`), + // then we can safely return. note that `v - 1 ulp` *can* be less than the current + // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough: + // the distance between `v - 1 ulp` and the current representation + // cannot exceed `10^kappa / 2`. + // + // the condition equals to `remainder + ulp < 10^kappa / 2`. + // since this can easily overflow, first check if `remainder < 10^kappa / 2`. + // we've already verified that `ulp < 10^kappa / 2`, so as long as + // `10^kappa` did not overflow after all, the second check is fine. + if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp { + return Some((len, exp)); + } + + // :<------- remainder ------>| : + // : | : + // :<--------- 10^kappa --------->: + // : | | : | + // : |1 ulp|1 ulp| + // : |<--->|<--->| + // -----------------------|-----|-----|----- + // | v | + // v - 1 ulp v + 1 ulp + // + // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation, + // we should round up and return. for the same reason we don't need to check `v + 1 ulp`. + // + // the condition equals to `remainder - ulp >= 10^kappa / 2`. + // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`, + // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`, + // so the second check does not overflow. + if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp { + if let Some(c) = round_up(buf, len) { + // only add an additional digit when we've been requested the fixed precision. + // we also need to check that, if the original buffer was empty, + // the additional digit can only be added when `exp == limit` (edge case). + exp += 1; + if exp > limit && len < buf.len() { + buf[len] = c; + len += 1; + } + } + return Some((len, exp)); + } + + // otherwise we are doomed (i.e. some values between `v - 1 ulp` and `v + 1 ulp` are + // rounding down and others are rounding up) and give up. + None + } +} + +/// The exact and fixed mode implementation for Grisu with Dragon fallback. +/// +/// This should be used for most cases. +pub fn format_exact(d: &Decoded, buf: &mut [u8], limit: i16) -> (/*#digits*/ usize, /*exp*/ i16) { + use num::flt2dec::strategy::dragon::format_exact as fallback; + match format_exact_opt(d, buf, limit) { + Some(ret) => ret, + None => fallback(d, buf, limit), + } +} + diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 71c2b38287e..011830ddb78 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -44,6 +44,9 @@ pub struct Wrapping(#[stable(feature = "rust1", since = "1.0.0")] pub T); #[unstable(feature = "core", reason = "may be removed or relocated")] pub mod wrapping; +#[unstable(feature = "core", reason = "internal routines only exposed for testing")] +pub mod flt2dec; + /// Types that have a "zero" value. /// /// This trait is intended for use in conjunction with `Add`, as an identity: -- cgit 1.4.1-3-g733a5