about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2014-05-06 10:16:40 -0700
committerbors <bors@rust-lang.org>2014-05-06 10:16:40 -0700
commit1f6db7f4f6399628eaaca5d38cc5fb38e71b4c23 (patch)
treeb00307d3ec57cb86241e20ff361839c64dc39c76
parentc600dc0f535b4afc7722bda4154e35f948e3f6b2 (diff)
parenta8da4f7309f1684d7c229de494c2527b6089b314 (diff)
downloadrust-1f6db7f4f6399628eaaca5d38cc5fb38e71b4c23.tar.gz
rust-1f6db7f4f6399628eaaca5d38cc5fb38e71b4c23.zip
auto merge of #13822 : EdorianDark/rust/master, r=cmr
New attempt to generalize stats, after #12606.
Since #12355 did not get merged, i want go get first get my change done and the try to fix sum.
-rw-r--r--src/libtest/lib.rs4
-rw-r--r--src/libtest/stats.rs209
2 files changed, 115 insertions, 98 deletions
diff --git a/src/libtest/lib.rs b/src/libtest/lib.rs
index 8c0abe32e3c..fa55444fe5b 100644
--- a/src/libtest/lib.rs
+++ b/src/libtest/lib.rs
@@ -413,7 +413,7 @@ pub fn opt_shard(maybestr: Option<~str>) -> Option<(uint,uint)> {
 
 #[deriving(Clone, Eq)]
 pub struct BenchSamples {
-    ns_iter_summ: stats::Summary,
+    ns_iter_summ: stats::Summary<f64>,
     mb_s: uint,
 }
 
@@ -1249,7 +1249,7 @@ impl Bencher {
     }
 
     // This is a more statistics-driven benchmark algorithm
-    pub fn auto_bench(&mut self, f: |&mut Bencher|) -> stats::Summary {
+    pub fn auto_bench(&mut self, f: |&mut Bencher|) -> stats::Summary<f64> {
 
         // Initial bench run to get ballpark figure.
         let mut n = 1_u64;
diff --git a/src/libtest/stats.rs b/src/libtest/stats.rs
index 67b5c557b59..e73a43efe76 100644
--- a/src/libtest/stats.rs
+++ b/src/libtest/stats.rs
@@ -14,12 +14,11 @@ use std::hash::Hash;
 use std::io;
 use std::mem;
 use std::num;
+use std::num::Zero;
 use collections::hashmap;
+use std::fmt::Show;
 
-// NB: this can probably be rewritten in terms of num::Num
-// to be less f64-specific.
-
-fn f64_cmp(x: f64, y: f64) -> Ordering {
+fn local_cmp<T:Float>(x: T, y: T) -> Ordering {
     // arbitrarily decide that NaNs are larger than everything.
     if y.is_nan() {
         Less
@@ -34,12 +33,12 @@ fn f64_cmp(x: f64, y: f64) -> Ordering {
     }
 }
 
-fn f64_sort(v: &mut [f64]) {
-    v.sort_by(|x: &f64, y: &f64| f64_cmp(*x, *y));
+fn local_sort<T: Float>(v: &mut [T]) {
+    v.sort_by(|x: &T, y: &T| local_cmp(*x, *y));
 }
 
 /// Trait that provides simple descriptive statistics on a univariate set of numeric samples.
-pub trait Stats {
+pub trait Stats <T: Float + FromPrimitive>{
 
     /// Sum of the samples.
     ///
@@ -48,24 +47,24 @@ pub trait Stats {
     /// ["Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric Predicates"]
     /// (http://www.cs.cmu.edu/~quake-papers/robust-arithmetic.ps)
     /// *Discrete & Computational Geometry 18*, 3 (Oct 1997), 305-363, Shewchuk J.R.
-    fn sum(self) -> f64;
+    fn sum(self) -> T;
 
     /// Minimum value of the samples.
-    fn min(self) -> f64;
+    fn min(self) -> T;
 
     /// Maximum value of the samples.
-    fn max(self) -> f64;
+    fn max(self) -> T;
 
     /// Arithmetic mean (average) of the samples: sum divided by sample-count.
     ///
     /// See: https://en.wikipedia.org/wiki/Arithmetic_mean
-    fn mean(self) -> f64;
+    fn mean(self) -> T;
 
     /// Median of the samples: value separating the lower half of the samples from the higher half.
     /// Equal to `self.percentile(50.0)`.
     ///
     /// See: https://en.wikipedia.org/wiki/Median
-    fn median(self) -> f64;
+    fn median(self) -> T;
 
     /// Variance of the samples: bias-corrected mean of the squares of the differences of each
     /// sample from the sample mean. Note that this calculates the _sample variance_ rather than the
@@ -74,7 +73,7 @@ pub trait Stats {
     /// than `n`.
     ///
     /// See: https://en.wikipedia.org/wiki/Variance
-    fn var(self) -> f64;
+    fn var(self) -> T;
 
     /// Standard deviation: the square root of the sample variance.
     ///
@@ -82,13 +81,13 @@ pub trait Stats {
     /// `median_abs_dev` for unknown distributions.
     ///
     /// See: https://en.wikipedia.org/wiki/Standard_deviation
-    fn std_dev(self) -> f64;
+    fn std_dev(self) -> T;
 
     /// Standard deviation as a percent of the mean value. See `std_dev` and `mean`.
     ///
     /// Note: this is not a robust statistic for non-normal distributions. Prefer the
     /// `median_abs_dev_pct` for unknown distributions.
-    fn std_dev_pct(self) -> f64;
+    fn std_dev_pct(self) -> T;
 
     /// Scaled median of the absolute deviations of each sample from the sample median. This is a
     /// robust (distribution-agnostic) estimator of sample variability. Use this in preference to
@@ -97,10 +96,10 @@ pub trait Stats {
     /// deviation.
     ///
     /// See: http://en.wikipedia.org/wiki/Median_absolute_deviation
-    fn median_abs_dev(self) -> f64;
+    fn median_abs_dev(self) -> T;
 
     /// Median absolute deviation as a percent of the median. See `median_abs_dev` and `median`.
-    fn median_abs_dev_pct(self) -> f64;
+    fn median_abs_dev_pct(self) -> T;
 
     /// Percentile: the value below which `pct` percent of the values in `self` fall. For example,
     /// percentile(95.0) will return the value `v` such that 95% of the samples `s` in `self`
@@ -109,7 +108,7 @@ pub trait Stats {
     /// Calculated by linear interpolation between closest ranks.
     ///
     /// See: http://en.wikipedia.org/wiki/Percentile
-    fn percentile(self, pct: f64) -> f64;
+    fn percentile(self, pct: T) -> T;
 
     /// Quartiles of the sample: three values that divide the sample into four equal groups, each
     /// with 1/4 of the data. The middle value is the median. See `median` and `percentile`. This
@@ -117,37 +116,37 @@ pub trait Stats {
     /// is otherwise equivalent.
     ///
     /// See also: https://en.wikipedia.org/wiki/Quartile
-    fn quartiles(self) -> (f64,f64,f64);
+    fn quartiles(self) -> (T,T,T);
 
     /// Inter-quartile range: the difference between the 25th percentile (1st quartile) and the 75th
     /// percentile (3rd quartile). See `quartiles`.
     ///
     /// See also: https://en.wikipedia.org/wiki/Interquartile_range
-    fn iqr(self) -> f64;
+    fn iqr(self) -> T;
 }
 
 /// Extracted collection of all the summary statistics of a sample set.
 #[deriving(Clone, Eq)]
 #[allow(missing_doc)]
-pub struct Summary {
-    pub sum: f64,
-    pub min: f64,
-    pub max: f64,
-    pub mean: f64,
-    pub median: f64,
-    pub var: f64,
-    pub std_dev: f64,
-    pub std_dev_pct: f64,
-    pub median_abs_dev: f64,
-    pub median_abs_dev_pct: f64,
-    pub quartiles: (f64,f64,f64),
-    pub iqr: f64,
+pub struct Summary<T> {
+    pub sum: T,
+    pub min: T,
+    pub max: T,
+    pub mean: T,
+    pub median: T,
+    pub var: T,
+    pub std_dev: T,
+    pub std_dev_pct: T,
+    pub median_abs_dev: T,
+    pub median_abs_dev_pct: T,
+    pub quartiles: (T,T,T),
+    pub iqr: T,
 }
 
-impl Summary {
+impl<T: Float + FromPrimitive> Summary<T> {
 
     /// Construct a new summary of a sample set.
-    pub fn new(samples: &[f64]) -> Summary {
+    pub fn new(samples: &[T]) -> Summary<T> {
         Summary {
             sum: samples.sum(),
             min: samples.min(),
@@ -165,11 +164,11 @@ impl Summary {
     }
 }
 
-impl<'a> Stats for &'a [f64] {
+impl<'a,T: Float + FromPrimitive> Stats<T> for &'a [T] {
 
     // FIXME #11059 handle NaN, inf and overflow
     #[allow(deprecated_owned_vector)]
-    fn sum(self) -> f64 {
+    fn sum(self) -> T {
         let mut partials = vec![];
 
         for &mut x in self.iter() {
@@ -185,7 +184,7 @@ impl<'a> Stats for &'a [f64] {
                 // `lo`. Together `hi+lo` are exactly equal to `x+y`.
                 let hi = x + y;
                 let lo = y - (hi - x);
-                if lo != 0f64 {
+                if !lo.is_zero() {
                     *partials.get_mut(j) = lo;
                     j += 1;
                 }
@@ -198,81 +197,89 @@ impl<'a> Stats for &'a [f64] {
                 partials.truncate(j+1);
             }
         }
-        partials.iter().fold(0.0, |p, q| p + *q)
+        let zero: T = Zero::zero();
+        partials.iter().fold(zero, |p, q| p + *q)
     }
 
-    fn min(self) -> f64 {
+    fn min(self) -> T {
         assert!(self.len() != 0);
         self.iter().fold(self[0], |p, q| p.min(*q))
     }
 
-    fn max(self) -> f64 {
+    fn max(self) -> T {
         assert!(self.len() != 0);
         self.iter().fold(self[0], |p, q| p.max(*q))
     }
 
-    fn mean(self) -> f64 {
+    fn mean(self) -> T {
         assert!(self.len() != 0);
-        self.sum() / (self.len() as f64)
+        self.sum() / FromPrimitive::from_uint(self.len()).unwrap()
     }
 
-    fn median(self) -> f64 {
-        self.percentile(50.0)
+    fn median(self) -> T {
+        self.percentile(FromPrimitive::from_uint(50).unwrap())
     }
 
-    fn var(self) -> f64 {
+    fn var(self) -> T {
         if self.len() < 2 {
-            0.0
+            Zero::zero()
         } else {
             let mean = self.mean();
-            let mut v = 0.0;
+            let mut v: T = Zero::zero();
             for s in self.iter() {
                 let x = *s - mean;
-                v += x*x;
+                v = v + x*x;
             }
             // NB: this is _supposed to be_ len-1, not len. If you
             // change it back to len, you will be calculating a
             // population variance, not a sample variance.
-            v/((self.len()-1) as f64)
+            let denom = FromPrimitive::from_uint(self.len()-1).unwrap();
+            v/denom
         }
     }
 
-    fn std_dev(self) -> f64 {
+    fn std_dev(self) -> T {
         self.var().sqrt()
     }
 
-    fn std_dev_pct(self) -> f64 {
-        (self.std_dev() / self.mean()) * 100.0
+    fn std_dev_pct(self) -> T {
+        let hundred = FromPrimitive::from_uint(100).unwrap();
+        (self.std_dev() / self.mean()) * hundred
     }
 
-    fn median_abs_dev(self) -> f64 {
+    fn median_abs_dev(self) -> T {
         let med = self.median();
-        let abs_devs: Vec<f64> = self.iter().map(|&v| num::abs(med - v)).collect();
+        let abs_devs: Vec<T> = self.iter().map(|&v| num::abs(med - v)).collect();
         // This constant is derived by smarter statistics brains than me, but it is
         // consistent with how R and other packages treat the MAD.
-        abs_devs.as_slice().median() * 1.4826
+        let number = FromPrimitive::from_f64(1.4826).unwrap();
+        abs_devs.as_slice().median() * number
     }
 
-    fn median_abs_dev_pct(self) -> f64 {
-        (self.median_abs_dev() / self.median()) * 100.0
+    fn median_abs_dev_pct(self) -> T {
+        let hundred = FromPrimitive::from_uint(100).unwrap();
+        (self.median_abs_dev() / self.median()) * hundred
     }
 
-    fn percentile(self, pct: f64) -> f64 {
+    fn percentile(self, pct: T) -> T {
         let mut tmp = Vec::from_slice(self);
-        f64_sort(tmp.as_mut_slice());
+        local_sort(tmp.as_mut_slice());
         percentile_of_sorted(tmp.as_slice(), pct)
     }
 
-    fn quartiles(self) -> (f64,f64,f64) {
+    fn quartiles(self) -> (T,T,T) {
         let mut tmp = Vec::from_slice(self);
-        f64_sort(tmp.as_mut_slice());
-        let a = percentile_of_sorted(tmp.as_slice(), 25.0);
-        let b = percentile_of_sorted(tmp.as_slice(), 50.0);
-        let c = percentile_of_sorted(tmp.as_slice(), 75.0);
+        local_sort(tmp.as_mut_slice());
+        let first = FromPrimitive::from_uint(25).unwrap();
+        let a = percentile_of_sorted(tmp.as_slice(), first);
+        let secound = FromPrimitive::from_uint(50).unwrap();
+        let b = percentile_of_sorted(tmp.as_slice(), secound);
+        let third = FromPrimitive::from_uint(75).unwrap();
+        let c = percentile_of_sorted(tmp.as_slice(), third);
         (a,b,c)
     }
 
-    fn iqr(self) -> f64 {
+    fn iqr(self) -> T {
         let (a,_,c) = self.quartiles();
         c - a
     }
@@ -281,21 +288,24 @@ impl<'a> Stats for &'a [f64] {
 
 // Helper function: extract a value representing the `pct` percentile of a sorted sample-set, using
 // linear interpolation. If samples are not sorted, return nonsensical value.
-fn percentile_of_sorted(sorted_samples: &[f64],
-                             pct: f64) -> f64 {
+fn percentile_of_sorted<T: Float + FromPrimitive>(sorted_samples: &[T],
+                                                             pct: T) -> T {
     assert!(sorted_samples.len() != 0);
     if sorted_samples.len() == 1 {
         return sorted_samples[0];
     }
-    assert!(0.0 <= pct);
-    assert!(pct <= 100.0);
-    if pct == 100.0 {
+    let zero: T = Zero::zero();
+    assert!(zero <= pct);
+    let hundred = FromPrimitive::from_uint(100).unwrap();
+    assert!(pct <= hundred);
+    if pct == hundred {
         return sorted_samples[sorted_samples.len() - 1];
     }
-    let rank = (pct / 100.0) * ((sorted_samples.len() - 1) as f64);
+    let length = FromPrimitive::from_uint(sorted_samples.len() - 1).unwrap();
+    let rank = (pct / hundred) * length;
     let lrank = rank.floor();
     let d = rank - lrank;
-    let n = lrank as uint;
+    let n = lrank.to_uint().unwrap();
     let lo = sorted_samples[n];
     let hi = sorted_samples[n+1];
     lo + (hi - lo) * d
@@ -308,11 +318,12 @@ fn percentile_of_sorted(sorted_samples: &[f64],
 /// change the number of samples, just changes the values of those that are outliers.
 ///
 /// See: http://en.wikipedia.org/wiki/Winsorising
-pub fn winsorize(samples: &mut [f64], pct: f64) {
+pub fn winsorize<T: Float + FromPrimitive>(samples: &mut [T], pct: T) {
     let mut tmp = Vec::from_slice(samples);
-    f64_sort(tmp.as_mut_slice());
+    local_sort(tmp.as_mut_slice());
     let lo = percentile_of_sorted(tmp.as_slice(), pct);
-    let hi = percentile_of_sorted(tmp.as_slice(), 100.0-pct);
+    let hundred: T = FromPrimitive::from_uint(100).unwrap();
+    let hi = percentile_of_sorted(tmp.as_slice(), hundred-pct);
     for samp in samples.mut_iter() {
         if *samp > hi {
             *samp = hi
@@ -323,8 +334,8 @@ pub fn winsorize(samples: &mut [f64], pct: f64) {
 }
 
 /// Render writes the min, max and quartiles of the provided `Summary` to the provided `Writer`.
-pub fn write_5_number_summary(w: &mut io::Writer,
-                              s: &Summary) -> io::IoResult<()> {
+pub fn write_5_number_summary<T: Float + Show>(w: &mut io::Writer,
+                                               s: &Summary<T>) -> io::IoResult<()> {
     let (q1,q2,q3) = s.quartiles;
     write!(w, "(min={}, q1={}, med={}, q3={}, max={})",
                      s.min,
@@ -346,24 +357,29 @@ pub fn write_5_number_summary(w: &mut io::Writer,
 ///   10 |        [--****#******----------]          | 40
 /// ~~~~
 
-pub fn write_boxplot(w: &mut io::Writer, s: &Summary,
-                     width_hint: uint) -> io::IoResult<()> {
+pub fn write_boxplot<T: Float + Show + FromPrimitive>(
+                     w: &mut io::Writer,
+                     s: &Summary<T>,
+                     width_hint: uint)
+                      -> io::IoResult<()> {
 
     let (q1,q2,q3) = s.quartiles;
 
     // the .abs() handles the case where numbers are negative
-    let lomag = 10.0_f64.powf(s.min.abs().log10().floor());
-    let himag = 10.0_f64.powf(s.max.abs().log10().floor());
+    let ten: T = FromPrimitive::from_uint(10).unwrap();
+    let lomag = ten.powf(s.min.abs().log10().floor());
+    let himag = ten.powf(s.max.abs().log10().floor());
 
     // need to consider when the limit is zero
-    let lo = if lomag == 0.0 {
-        0.0
+    let zero: T = Zero::zero();
+    let lo = if lomag.is_zero() {
+        zero
     } else {
         (s.min / lomag).floor() * lomag
     };
 
-    let hi = if himag == 0.0 {
-        0.0
+    let hi = if himag.is_zero() {
+        zero
     } else {
         (s.max / himag).ceil() * himag
     };
@@ -374,8 +390,9 @@ pub fn write_boxplot(w: &mut io::Writer, s: &Summary,
     let histr = hi.to_str();
 
     let overhead_width = lostr.len() + histr.len() + 4;
-    let range_width = width_hint - overhead_width;;
-    let char_step = range / (range_width as f64);
+    let range_width = width_hint - overhead_width;
+    let range_float = FromPrimitive::from_uint(range_width).unwrap();
+    let char_step = range / range_float;
 
     try!(write!(w, "{} |", lostr));
 
@@ -384,37 +401,37 @@ pub fn write_boxplot(w: &mut io::Writer, s: &Summary,
 
     while c < range_width && v < s.min {
         try!(write!(w, " "));
-        v += char_step;
+        v = v + char_step;
         c += 1;
     }
     try!(write!(w, "["));
     c += 1;
     while c < range_width && v < q1 {
         try!(write!(w, "-"));
-        v += char_step;
+        v = v + char_step;
         c += 1;
     }
     while c < range_width && v < q2 {
         try!(write!(w, "*"));
-        v += char_step;
+        v = v + char_step;
         c += 1;
     }
     try!(write!(w, r"\#"));
     c += 1;
     while c < range_width && v < q3 {
         try!(write!(w, "*"));
-        v += char_step;
+        v = v + char_step;
         c += 1;
     }
     while c < range_width && v < s.max {
         try!(write!(w, "-"));
-        v += char_step;
+        v = v + char_step;
         c += 1;
     }
     try!(write!(w, "]"));
     while c < range_width {
         try!(write!(w, " "));
-        v += char_step;
+        v = v + char_step;
         c += 1;
     }
 
@@ -452,7 +469,7 @@ mod tests {
         })
     )
 
-    fn check(samples: &[f64], summ: &Summary) {
+    fn check(samples: &[f64], summ: &Summary<f64>) {
 
         let summ2 = Summary::new(samples);
 
@@ -1011,7 +1028,7 @@ mod tests {
     #[test]
     fn test_boxplot_nonpositive() {
         #[allow(deprecated_owned_vector)]
-        fn t(s: &Summary, expected: ~str) {
+        fn t(s: &Summary<f64>, expected: ~str) {
             use std::io::MemWriter;
             let mut m = MemWriter::new();
             write_boxplot(&mut m as &mut io::Writer, s, 30).unwrap();