diff options
| author | Alex Crichton <alex@alexcrichton.com> | 2014-03-14 11:16:10 -0700 |
|---|---|---|
| committer | Alex Crichton <alex@alexcrichton.com> | 2014-03-14 13:59:02 -0700 |
| commit | 58e4ab2b33f559107dbdfa9d3cab882cf8029481 (patch) | |
| tree | 749ec81e1a287e6ce082c201d97cec7243612a79 /src/libextra | |
| parent | e99d523707c8058383e7a551e49d59ce622d5765 (diff) | |
| download | rust-58e4ab2b33f559107dbdfa9d3cab882cf8029481.tar.gz rust-58e4ab2b33f559107dbdfa9d3cab882cf8029481.zip | |
extra: Put the nail in the coffin, delete libextra
This commit shreds all remnants of libextra from the compiler and standard distribution. Two modules, c_vec/tempfile, were moved into libstd after some cleanup, and the other modules were moved to separate crates as seen fit. Closes #8784 Closes #12413 Closes #12576
Diffstat (limited to 'src/libextra')
| -rw-r--r-- | src/libextra/c_vec.rs | 240 | ||||
| -rw-r--r-- | src/libextra/lib.rs | 52 | ||||
| -rw-r--r-- | src/libextra/stats.rs | 1057 | ||||
| -rw-r--r-- | src/libextra/tempfile.rs | 84 | ||||
| -rw-r--r-- | src/libextra/unicode.rs | 262 | ||||
| -rw-r--r-- | src/libextra/url.rs | 1277 | ||||
| -rw-r--r-- | src/libextra/workcache.rs | 516 |
7 files changed, 0 insertions, 3488 deletions
diff --git a/src/libextra/c_vec.rs b/src/libextra/c_vec.rs deleted file mode 100644 index b93ffce636b..00000000000 --- a/src/libextra/c_vec.rs +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -/*! - * Library to interface with chunks of memory allocated in C. - * - * It is often desirable to safely interface with memory allocated from C, - * encapsulating the unsafety into allocation and destruction time. Indeed, - * allocating memory externally is currently the only way to give Rust shared - * mut state with C programs that keep their own references; vectors are - * unsuitable because they could be reallocated or moved at any time, and - * importing C memory into a vector takes a one-time snapshot of the memory. - * - * This module simplifies the usage of such external blocks of memory. Memory - * is encapsulated into an opaque object after creation; the lifecycle of the - * memory can be optionally managed by Rust, if an appropriate destructor - * closure is provided. Safety is ensured by bounds-checking accesses, which - * are marshalled through get and set functions. - * - * There are three unsafe functions: the two constructors, and the - * unwrap method. The constructors are unsafe for the - * obvious reason (they act on a pointer that cannot be checked inside the - * method), but `unwrap()` is somewhat more subtle in its unsafety. - * It returns the contained pointer, but at the same time destroys the CVec - * without running its destructor. This can be used to pass memory back to - * C, but care must be taken that the ownership of underlying resources are - * handled correctly, i.e. that allocated memory is eventually freed - * if necessary. - */ - -use std::cast; -use std::ptr; -use std::raw; - -/** - * The type representing a foreign chunk of memory - */ -pub struct CVec<T> { - priv base: *mut T, - priv len: uint, - priv rsrc: DtorRes, -} - -struct DtorRes { - dtor: Option<proc()>, -} - -#[unsafe_destructor] -impl Drop for DtorRes { - fn drop(&mut self) { - let dtor = self.dtor.take(); - match dtor { - None => (), - Some(f) => f() - } - } -} - -impl DtorRes { - fn new(dtor: Option<proc()>) -> DtorRes { - DtorRes { - dtor: dtor, - } - } -} - -impl <T> CVec<T> { - /** - * Create a `CVec` from a raw pointer to a buffer with a given length. - * - * Fails if the given pointer is null. - * - * # Arguments - * - * * base - A raw pointer to a buffer - * * len - The number of elements in the buffer - */ - pub unsafe fn new(base: *mut T, len: uint) -> CVec<T> { - assert!(base != ptr::mut_null()); - CVec { - base: base, - len: len, - rsrc: DtorRes::new(None) - } - } - - /** - * Create a `CVec` from a foreign buffer, with a given length, - * and a function to run upon destruction. - * - * Fails if the given pointer is null. - * - * # Arguments - * - * * base - A foreign pointer to a buffer - * * len - The number of elements in the buffer - * * dtor - A proc to run when the value is destructed, useful - * for freeing the buffer, etc. - */ - pub unsafe fn new_with_dtor(base: *mut T, len: uint, dtor: proc()) -> CVec<T> { - assert!(base != ptr::mut_null()); - CVec { - base: base, - len: len, - rsrc: DtorRes::new(Some(dtor)) - } - } - - /// View the stored data as a slice. - pub fn as_slice<'a>(&'a self) -> &'a [T] { - unsafe { - cast::transmute(raw::Slice { data: self.base as *T, len: self.len }) - } - } - - /// View the stored data as a mutable slice. - pub fn as_mut_slice<'a>(&'a mut self) -> &'a mut [T] { - unsafe { - cast::transmute(raw::Slice { data: self.base as *T, len: self.len }) - } - } - - /** - * Retrieves an element at a given index - * - * Fails if `ofs` is greater or equal to the length of the vector - */ - pub fn get<'a>(&'a self, ofs: uint) -> &'a T { - assert!(ofs < self.len); - unsafe { - &*self.base.offset(ofs as int) - } - } - - /** - * Retrieves a mutable element at a given index - * - * Fails if `ofs` is greater or equal to the length of the vector - */ - pub fn get_mut<'a>(&'a mut self, ofs: uint) -> &'a mut T { - assert!(ofs < self.len); - unsafe { - &mut *self.base.offset(ofs as int) - } - } - - /** - * Unwrap the pointer without running the destructor - * - * This method retrieves the underlying pointer, and in the process - * destroys the CVec but without running the destructor. A use case - * would be transferring ownership of the buffer to a C function, as - * in this case you would not want to run the destructor. - * - * Note that if you want to access the underlying pointer without - * cancelling the destructor, you can simply call `transmute` on the return - * value of `get(0)`. - */ - pub unsafe fn unwrap(mut self) -> *mut T { - self.rsrc.dtor = None; - self.base - } -} - -impl <T> Container for CVec<T> { - /// Returns the length of the vector - fn len(&self) -> uint { self.len } -} - -#[cfg(test)] -mod tests { - use super::*; - - use std::libc::*; - use std::libc; - use std::ptr; - use std::rt::global_heap::malloc_raw; - - fn malloc(n: uint) -> CVec<u8> { - unsafe { - let mem = malloc_raw(n); - - CVec::new_with_dtor(mem as *mut u8, n, - proc() { libc::free(mem as *mut c_void); }) - } - } - - #[test] - fn test_basic() { - let mut cv = malloc(16); - - *cv.get_mut(3) = 8; - *cv.get_mut(4) = 9; - assert_eq!(*cv.get(3), 8); - assert_eq!(*cv.get(4), 9); - assert_eq!(cv.len(), 16); - } - - #[test] - #[should_fail] - fn test_fail_at_null() { - unsafe { - CVec::new(ptr::mut_null::<u8>(), 9); - } - } - - #[test] - #[should_fail] - fn test_overrun_get() { - let cv = malloc(16); - - cv.get(17); - } - - #[test] - #[should_fail] - fn test_overrun_set() { - let mut cv = malloc(16); - - *cv.get_mut(17) = 0; - } - - #[test] - fn test_unwrap() { - unsafe { - let cv = CVec::new_with_dtor(1 as *mut int, 0, - proc() { fail!("Don't run this destructor!") }); - let p = cv.unwrap(); - assert_eq!(p, 1 as *mut int); - } - } - -} diff --git a/src/libextra/lib.rs b/src/libextra/lib.rs deleted file mode 100644 index 4bb6b0050b8..00000000000 --- a/src/libextra/lib.rs +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -/*! - -Rust extras. - -The `extra` crate is a set of useful modules for a variety of -purposes, including collections, numerics, I/O, serialization, -and concurrency. - -Rust extras are part of the standard Rust distribution. - -*/ - -#[crate_id = "extra#0.10-pre"]; -#[comment = "Rust extras"]; -#[license = "MIT/ASL2"]; -#[crate_type = "rlib"]; -#[crate_type = "dylib"]; -#[doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png", - html_favicon_url = "http://www.rust-lang.org/favicon.ico", - html_root_url = "http://static.rust-lang.org/doc/master")]; - -#[feature(macro_rules, globs, managed_boxes, asm, default_type_params)]; - -#[allow(deprecated_owned_vector)]; -#[deny(non_camel_case_types)]; -#[deny(missing_doc)]; - -extern crate collections; -extern crate rand; -extern crate serialize; -extern crate sync; -extern crate time; - -// Utility modules -pub mod c_vec; -pub mod url; -pub mod tempfile; -pub mod workcache; -pub mod stats; - -#[cfg(unicode)] -mod unicode; diff --git a/src/libextra/stats.rs b/src/libextra/stats.rs deleted file mode 100644 index d67e8f85ba9..00000000000 --- a/src/libextra/stats.rs +++ /dev/null @@ -1,1057 +0,0 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#[allow(missing_doc)]; - -use std::hash::Hash; -use std::io; -use std::mem; -use std::num; -use collections::hashmap; - -// NB: this can probably be rewritten in terms of num::Num -// to be less f64-specific. - -fn f64_cmp(x: f64, y: f64) -> Ordering { - // arbitrarily decide that NaNs are larger than everything. - if y.is_nan() { - Less - } else if x.is_nan() { - Greater - } else if x < y { - Less - } else if x == y { - Equal - } else { - Greater - } -} - -fn f64_sort(v: &mut [f64]) { - v.sort_by(|x: &f64, y: &f64| f64_cmp(*x, *y)); -} - -/// Trait that provides simple descriptive statistics on a univariate set of numeric samples. -pub trait Stats { - - /// Sum of the samples. - /// - /// Note: this method sacrifices performance at the altar of accuracy - /// Depends on IEEE-754 arithmetic guarantees. See proof of correctness at: - /// ["Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric Predicates"] - /// (http://www.cs.cmu.edu/~quake-papers/robust-arithmetic.ps) - /// *Discrete & Computational Geometry 18*, 3 (Oct 1997), 305-363, Shewchuk J.R. - fn sum(self) -> f64; - - /// Minimum value of the samples. - fn min(self) -> f64; - - /// Maximum value of the samples. - fn max(self) -> f64; - - /// Arithmetic mean (average) of the samples: sum divided by sample-count. - /// - /// See: https://en.wikipedia.org/wiki/Arithmetic_mean - fn mean(self) -> f64; - - /// Median of the samples: value separating the lower half of the samples from the higher half. - /// Equal to `self.percentile(50.0)`. - /// - /// See: https://en.wikipedia.org/wiki/Median - fn median(self) -> f64; - - /// Variance of the samples: bias-corrected mean of the squares of the differences of each - /// sample from the sample mean. Note that this calculates the _sample variance_ rather than the - /// population variance, which is assumed to be unknown. It therefore corrects the `(n-1)/n` - /// bias that would appear if we calculated a population variance, by dividing by `(n-1)` rather - /// than `n`. - /// - /// See: https://en.wikipedia.org/wiki/Variance - fn var(self) -> f64; - - /// Standard deviation: the square root of the sample variance. - /// - /// Note: this is not a robust statistic for non-normal distributions. Prefer the - /// `median_abs_dev` for unknown distributions. - /// - /// See: https://en.wikipedia.org/wiki/Standard_deviation - fn std_dev(self) -> f64; - - /// Standard deviation as a percent of the mean value. See `std_dev` and `mean`. - /// - /// Note: this is not a robust statistic for non-normal distributions. Prefer the - /// `median_abs_dev_pct` for unknown distributions. - fn std_dev_pct(self) -> f64; - - /// Scaled median of the absolute deviations of each sample from the sample median. This is a - /// robust (distribution-agnostic) estimator of sample variability. Use this in preference to - /// `std_dev` if you cannot assume your sample is normally distributed. Note that this is scaled - /// by the constant `1.4826` to allow its use as a consistent estimator for the standard - /// deviation. - /// - /// See: http://en.wikipedia.org/wiki/Median_absolute_deviation - fn median_abs_dev(self) -> f64; - - /// Median absolute deviation as a percent of the median. See `median_abs_dev` and `median`. - fn median_abs_dev_pct(self) -> f64; - - /// Percentile: the value below which `pct` percent of the values in `self` fall. For example, - /// percentile(95.0) will return the value `v` such that 95% of the samples `s` in `self` - /// satisfy `s <= v`. - /// - /// Calculated by linear interpolation between closest ranks. - /// - /// See: http://en.wikipedia.org/wiki/Percentile - fn percentile(self, pct: f64) -> f64; - - /// Quartiles of the sample: three values that divide the sample into four equal groups, each - /// with 1/4 of the data. The middle value is the median. See `median` and `percentile`. This - /// function may calculate the 3 quartiles more efficiently than 3 calls to `percentile`, but - /// is otherwise equivalent. - /// - /// See also: https://en.wikipedia.org/wiki/Quartile - fn quartiles(self) -> (f64,f64,f64); - - /// Inter-quartile range: the difference between the 25th percentile (1st quartile) and the 75th - /// percentile (3rd quartile). See `quartiles`. - /// - /// See also: https://en.wikipedia.org/wiki/Interquartile_range - fn iqr(self) -> f64; -} - -/// Extracted collection of all the summary statistics of a sample set. -#[deriving(Clone, Eq)] -#[allow(missing_doc)] -pub struct Summary { - sum: f64, - min: f64, - max: f64, - mean: f64, - median: f64, - var: f64, - std_dev: f64, - std_dev_pct: f64, - median_abs_dev: f64, - median_abs_dev_pct: f64, - quartiles: (f64,f64,f64), - iqr: f64, -} - -impl Summary { - - /// Construct a new summary of a sample set. - pub fn new(samples: &[f64]) -> Summary { - Summary { - sum: samples.sum(), - min: samples.min(), - max: samples.max(), - mean: samples.mean(), - median: samples.median(), - var: samples.var(), - std_dev: samples.std_dev(), - std_dev_pct: samples.std_dev_pct(), - median_abs_dev: samples.median_abs_dev(), - median_abs_dev_pct: samples.median_abs_dev_pct(), - quartiles: samples.quartiles(), - iqr: samples.iqr() - } - } -} - -impl<'a> Stats for &'a [f64] { - - // FIXME #11059 handle NaN, inf and overflow - fn sum(self) -> f64 { - let mut partials : ~[f64] = ~[]; - - for &mut x in self.iter() { - let mut j = 0; - // This inner loop applies `hi`/`lo` summation to each - // partial so that the list of partial sums remains exact. - for i in range(0, partials.len()) { - let mut y = partials[i]; - if num::abs(x) < num::abs(y) { - mem::swap(&mut x, &mut y); - } - // Rounded `x+y` is stored in `hi` with round-off stored in - // `lo`. Together `hi+lo` are exactly equal to `x+y`. - let hi = x + y; - let lo = y - (hi - x); - if lo != 0f64 { - partials[j] = lo; - j += 1; - } - x = hi; - } - if j >= partials.len() { - partials.push(x); - } else { - partials[j] = x; - partials.truncate(j+1); - } - } - partials.iter().fold(0.0, |p, q| p + *q) - } - - fn min(self) -> f64 { - assert!(self.len() != 0); - self.iter().fold(self[0], |p, q| p.min(*q)) - } - - fn max(self) -> f64 { - assert!(self.len() != 0); - self.iter().fold(self[0], |p, q| p.max(*q)) - } - - fn mean(self) -> f64 { - assert!(self.len() != 0); - self.sum() / (self.len() as f64) - } - - fn median(self) -> f64 { - self.percentile(50.0) - } - - fn var(self) -> f64 { - if self.len() < 2 { - 0.0 - } else { - let mean = self.mean(); - let mut v = 0.0; - for s in self.iter() { - let x = *s - mean; - v += x*x; - } - // NB: this is _supposed to be_ len-1, not len. If you - // change it back to len, you will be calculating a - // population variance, not a sample variance. - v/((self.len()-1) as f64) - } - } - - fn std_dev(self) -> f64 { - self.var().sqrt() - } - - fn std_dev_pct(self) -> f64 { - (self.std_dev() / self.mean()) * 100.0 - } - - fn median_abs_dev(self) -> f64 { - let med = self.median(); - let abs_devs = self.map(|&v| num::abs(med - v)); - // This constant is derived by smarter statistics brains than me, but it is - // consistent with how R and other packages treat the MAD. - abs_devs.median() * 1.4826 - } - - fn median_abs_dev_pct(self) -> f64 { - (self.median_abs_dev() / self.median()) * 100.0 - } - - fn percentile(self, pct: f64) -> f64 { - let mut tmp = self.to_owned(); - f64_sort(tmp); - percentile_of_sorted(tmp, pct) - } - - fn quartiles(self) -> (f64,f64,f64) { - let mut tmp = self.to_owned(); - f64_sort(tmp); - let a = percentile_of_sorted(tmp, 25.0); - let b = percentile_of_sorted(tmp, 50.0); - let c = percentile_of_sorted(tmp, 75.0); - (a,b,c) - } - - fn iqr(self) -> f64 { - let (a,_,c) = self.quartiles(); - c - a - } -} - - -// Helper function: extract a value representing the `pct` percentile of a sorted sample-set, using -// linear interpolation. If samples are not sorted, return nonsensical value. -fn percentile_of_sorted(sorted_samples: &[f64], - pct: f64) -> f64 { - assert!(sorted_samples.len() != 0); - if sorted_samples.len() == 1 { - return sorted_samples[0]; - } - assert!(0.0 <= pct); - assert!(pct <= 100.0); - if pct == 100.0 { - return sorted_samples[sorted_samples.len() - 1]; - } - let rank = (pct / 100.0) * ((sorted_samples.len() - 1) as f64); - let lrank = rank.floor(); - let d = rank - lrank; - let n = lrank as uint; - let lo = sorted_samples[n]; - let hi = sorted_samples[n+1]; - lo + (hi - lo) * d -} - - -/// Winsorize a set of samples, replacing values above the `100-pct` percentile and below the `pct` -/// percentile with those percentiles themselves. This is a way of minimizing the effect of -/// outliers, at the cost of biasing the sample. It differs from trimming in that it does not -/// change the number of samples, just changes the values of those that are outliers. -/// -/// See: http://en.wikipedia.org/wiki/Winsorising -pub fn winsorize(samples: &mut [f64], pct: f64) { - let mut tmp = samples.to_owned(); - f64_sort(tmp); - let lo = percentile_of_sorted(tmp, pct); - let hi = percentile_of_sorted(tmp, 100.0-pct); - for samp in samples.mut_iter() { - if *samp > hi { - *samp = hi - } else if *samp < lo { - *samp = lo - } - } -} - -/// Render writes the min, max and quartiles of the provided `Summary` to the provided `Writer`. -pub fn write_5_number_summary(w: &mut io::Writer, - s: &Summary) -> io::IoResult<()> { - let (q1,q2,q3) = s.quartiles; - write!(w, "(min={}, q1={}, med={}, q3={}, max={})", - s.min, - q1, - q2, - q3, - s.max) -} - -/// Render a boxplot to the provided writer. The boxplot shows the min, max and quartiles of the -/// provided `Summary` (thus includes the mean) and is scaled to display within the range of the -/// nearest multiple-of-a-power-of-ten above and below the min and max of possible values, and -/// target `width_hint` characters of display (though it will be wider if necessary). -/// -/// As an example, the summary with 5-number-summary `(min=15, q1=17, med=20, q3=24, max=31)` might -/// display as: -/// -/// ~~~~ignore -/// 10 | [--****#******----------] | 40 -/// ~~~~ - -pub fn write_boxplot(w: &mut io::Writer, s: &Summary, - width_hint: uint) -> io::IoResult<()> { - - let (q1,q2,q3) = s.quartiles; - - // the .abs() handles the case where numbers are negative - let lomag = (10.0_f64).powf(&(s.min.abs().log10().floor())); - let himag = (10.0_f64).powf(&(s.max.abs().log10().floor())); - - // need to consider when the limit is zero - let lo = if lomag == 0.0 { - 0.0 - } else { - (s.min / lomag).floor() * lomag - }; - - let hi = if himag == 0.0 { - 0.0 - } else { - (s.max / himag).ceil() * himag - }; - - let range = hi - lo; - - let lostr = lo.to_str(); - let histr = hi.to_str(); - - let overhead_width = lostr.len() + histr.len() + 4; - let range_width = width_hint - overhead_width;; - let char_step = range / (range_width as f64); - - try!(write!(w, "{} |", lostr)); - - let mut c = 0; - let mut v = lo; - - while c < range_width && v < s.min { - try!(write!(w, " ")); - v += char_step; - c += 1; - } - try!(write!(w, "[")); - c += 1; - while c < range_width && v < q1 { - try!(write!(w, "-")); - v += char_step; - c += 1; - } - while c < range_width && v < q2 { - try!(write!(w, "*")); - v += char_step; - c += 1; - } - try!(write!(w, r"\#")); - c += 1; - while c < range_width && v < q3 { - try!(write!(w, "*")); - v += char_step; - c += 1; - } - while c < range_width && v < s.max { - try!(write!(w, "-")); - v += char_step; - c += 1; - } - try!(write!(w, "]")); - while c < range_width { - try!(write!(w, " ")); - v += char_step; - c += 1; - } - - try!(write!(w, "| {}", histr)); - Ok(()) -} - -/// Returns a HashMap with the number of occurrences of every element in the -/// sequence that the iterator exposes. -pub fn freq_count<T: Iterator<U>, U: Eq+Hash>(mut iter: T) -> hashmap::HashMap<U, uint> { - let mut map: hashmap::HashMap<U,uint> = hashmap::HashMap::new(); - for elem in iter { - map.insert_or_update_with(elem, 1, |_, count| *count += 1); - } - map -} - -// Test vectors generated from R, using the script src/etc/stat-test-vectors.r. - -#[cfg(test)] -mod tests { - use stats::Stats; - use stats::Summary; - use stats::write_5_number_summary; - use stats::write_boxplot; - use std::io; - use std::str; - use std::f64; - - macro_rules! assert_approx_eq( - ($a:expr, $b:expr) => ({ - let (a, b) = (&$a, &$b); - assert!((*a - *b).abs() < 1.0e-6, - "{} is not approximately equal to {}", *a, *b); - }) - ) - - fn check(samples: &[f64], summ: &Summary) { - - let summ2 = Summary::new(samples); - - let mut w = io::stdout(); - let w = &mut w as &mut io::Writer; - (write!(w, "\n")).unwrap(); - write_5_number_summary(w, &summ2).unwrap(); - (write!(w, "\n")).unwrap(); - write_boxplot(w, &summ2, 50).unwrap(); - (write!(w, "\n")).unwrap(); - - assert_eq!(summ.sum, summ2.sum); - assert_eq!(summ.min, summ2.min); - assert_eq!(summ.max, summ2.max); - assert_eq!(summ.mean, summ2.mean); - assert_eq!(summ.median, summ2.median); - - // We needed a few more digits to get exact equality on these - // but they're within float epsilon, which is 1.0e-6. - assert_approx_eq!(summ.var, summ2.var); - assert_approx_eq!(summ.std_dev, summ2.std_dev); - assert_approx_eq!(summ.std_dev_pct, summ2.std_dev_pct); - assert_approx_eq!(summ.median_abs_dev, summ2.median_abs_dev); - assert_approx_eq!(summ.median_abs_dev_pct, summ2.median_abs_dev_pct); - - assert_eq!(summ.quartiles, summ2.quartiles); - assert_eq!(summ.iqr, summ2.iqr); - } - - #[test] - fn test_min_max_nan() { - let xs = &[1.0, 2.0, f64::NAN, 3.0, 4.0]; - let summary = Summary::new(xs); - assert_eq!(summary.min, 1.0); - assert_eq!(summary.max, 4.0); - } - - #[test] - fn test_norm2() { - let val = &[ - 958.0000000000, - 924.0000000000, - ]; - let summ = &Summary { - sum: 1882.0000000000, - min: 924.0000000000, - max: 958.0000000000, - mean: 941.0000000000, - median: 941.0000000000, - var: 578.0000000000, - std_dev: 24.0416305603, - std_dev_pct: 2.5549022912, - median_abs_dev: 25.2042000000, - median_abs_dev_pct: 2.6784484591, - quartiles: (932.5000000000,941.0000000000,949.5000000000), - iqr: 17.0000000000, - }; - check(val, summ); - } - #[test] - fn test_norm10narrow() { - let val = &[ - 966.0000000000, - 985.0000000000, - 1110.0000000000, - 848.0000000000, - 821.0000000000, - 975.0000000000, - 962.0000000000, - 1157.0000000000, - 1217.0000000000, - 955.0000000000, - ]; - let summ = &Summary { - sum: 9996.0000000000, - min: 821.0000000000, - max: 1217.0000000000, - mean: 999.6000000000, - median: 970.5000000000, - var: 16050.7111111111, - std_dev: 126.6914010938, - std_dev_pct: 12.6742097933, - median_abs_dev: 102.2994000000, - median_abs_dev_pct: 10.5408964451, - quartiles: (956.7500000000,970.5000000000,1078.7500000000), - iqr: 122.0000000000, - }; - check(val, summ); - } - #[test] - fn test_norm10medium() { - let val = &[ - 954.0000000000, - 1064.0000000000, - 855.0000000000, - 1000.0000000000, - 743.0000000000, - 1084.0000000000, - 704.0000000000, - 1023.0000000000, - 357.0000000000, - 869.0000000000, - ]; - let summ = &Summary { - sum: 8653.0000000000, - min: 357.0000000000, - max: 1084.0000000000, - mean: 865.3000000000, - median: 911.5000000000, - var: 48628.4555555556, - std_dev: 220.5186059170, - std_dev_pct: 25.4846418487, - median_abs_dev: 195.7032000000, - median_abs_dev_pct: 21.4704552935, - quartiles: (771.0000000000,911.5000000000,1017.2500000000), - iqr: 246.2500000000, - }; - check(val, summ); - } - #[test] - fn test_norm10wide() { - let val = &[ - 505.0000000000, - 497.0000000000, - 1591.0000000000, - 887.0000000000, - 1026.0000000000, - 136.0000000000, - 1580.0000000000, - 940.0000000000, - 754.0000000000, - 1433.0000000000, - ]; - let summ = &Summary { - sum: 9349.0000000000, - min: 136.0000000000, - max: 1591.0000000000, - mean: 934.9000000000, - median: 913.5000000000, - var: 239208.9888888889, - std_dev: 489.0899599142, - std_dev_pct: 52.3146817750, - median_abs_dev: 611.5725000000, - median_abs_dev_pct: 66.9482758621, - quartiles: (567.2500000000,913.5000000000,1331.2500000000), - iqr: 764.0000000000, - }; - check(val, summ); - } - #[test] - fn test_norm25verynarrow() { - let val = &[ - 991.0000000000, - 1018.0000000000, - 998.0000000000, - 1013.0000000000, - 974.0000000000, - 1007.0000000000, - 1014.0000000000, - 999.0000000000, - 1011.0000000000, - 978.0000000000, - 985.0000000000, - 999.0000000000, - 983.0000000000, - 982.0000000000, - 1015.0000000000, - 1002.0000000000, - 977.0000000000, - 948.0000000000, - 1040.0000000000, - 974.0000000000, - 996.0000000000, - 989.0000000000, - 1015.0000000000, - 994.0000000000, - 1024.0000000000, - ]; - let summ = &Summary { - sum: 24926.0000000000, - min: 948.0000000000, - max: 1040.0000000000, - mean: 997.0400000000, - median: 998.0000000000, - var: 393.2066666667, - std_dev: 19.8294393937, - std_dev_pct: 1.9888308788, - median_abs_dev: 22.2390000000, - median_abs_dev_pct: 2.2283567134, - quartiles: (983.0000000000,998.0000000000,1013.0000000000), - iqr: 30.0000000000, - }; - check(val, summ); - } - #[test] - fn test_exp10a() { - let val = &[ - 23.0000000000, - 11.0000000000, - 2.0000000000, - 57.0000000000, - 4.0000000000, - 12.0000000000, - 5.0000000000, - 29.0000000000, - 3.0000000000, - 21.0000000000, - ]; - let summ = &Summary { - sum: 167.0000000000, - min: 2.0000000000, - max: 57.0000000000, - mean: 16.7000000000, - median: 11.5000000000, - var: 287.7888888889, - std_dev: 16.9643416875, - std_dev_pct: 101.5828843560, - median_abs_dev: 13.3434000000, - median_abs_dev_pct: 116.0295652174, - quartiles: (4.2500000000,11.5000000000,22.5000000000), - iqr: 18.2500000000, - }; - check(val, summ); - } - #[test] - fn test_exp10b() { - let val = &[ - 24.0000000000, - 17.0000000000, - 6.0000000000, - 38.0000000000, - 25.0000000000, - 7.0000000000, - 51.0000000000, - 2.0000000000, - 61.0000000000, - 32.0000000000, - ]; - let summ = &Summary { - sum: 263.0000000000, - min: 2.0000000000, - max: 61.0000000000, - mean: 26.3000000000, - median: 24.5000000000, - var: 383.5666666667, - std_dev: 19.5848580967, - std_dev_pct: 74.4671410520, - median_abs_dev: 22.9803000000, - median_abs_dev_pct: 93.7971428571, - quartiles: (9.5000000000,24.5000000000,36.5000000000), - iqr: 27.0000000000, - }; - check(val, summ); - } - #[test] - fn test_exp10c() { - let val = &[ - 71.0000000000, - 2.0000000000, - 32.0000000000, - 1.0000000000, - 6.0000000000, - 28.0000000000, - 13.0000000000, - 37.0000000000, - 16.0000000000, - 36.0000000000, - ]; - let summ = &Summary { - sum: 242.0000000000, - min: 1.0000000000, - max: 71.0000000000, - mean: 24.2000000000, - median: 22.0000000000, - var: 458.1777777778, - std_dev: 21.4050876611, - std_dev_pct: 88.4507754589, - median_abs_dev: 21.4977000000, - median_abs_dev_pct: 97.7168181818, - quartiles: (7.7500000000,22.0000000000,35.0000000000), - iqr: 27.2500000000, - }; - check(val, summ); - } - #[test] - fn test_exp25() { - let val = &[ - 3.0000000000, - 24.0000000000, - 1.0000000000, - 19.0000000000, - 7.0000000000, - 5.0000000000, - 30.0000000000, - 39.0000000000, - 31.0000000000, - 13.0000000000, - 25.0000000000, - 48.0000000000, - 1.0000000000, - 6.0000000000, - 42.0000000000, - 63.0000000000, - 2.0000000000, - 12.0000000000, - 108.0000000000, - 26.0000000000, - 1.0000000000, - 7.0000000000, - 44.0000000000, - 25.0000000000, - 11.0000000000, - ]; - let summ = &Summary { - sum: 593.0000000000, - min: 1.0000000000, - max: 108.0000000000, - mean: 23.7200000000, - median: 19.0000000000, - var: 601.0433333333, - std_dev: 24.5161851301, - std_dev_pct: 103.3565983562, - median_abs_dev: 19.2738000000, - median_abs_dev_pct: 101.4410526316, - quartiles: (6.0000000000,19.0000000000,31.0000000000), - iqr: 25.0000000000, - }; - check(val, summ); - } - #[test] - fn test_binom25() { - let val = &[ - 18.0000000000, - 17.0000000000, - 27.0000000000, - 15.0000000000, - 21.0000000000, - 25.0000000000, - 17.0000000000, - 24.0000000000, - 25.0000000000, - 24.0000000000, - 26.0000000000, - 26.0000000000, - 23.0000000000, - 15.0000000000, - 23.0000000000, - 17.0000000000, - 18.0000000000, - 18.0000000000, - 21.0000000000, - 16.0000000000, - 15.0000000000, - 31.0000000000, - 20.0000000000, - 17.0000000000, - 15.0000000000, - ]; - let summ = &Summary { - sum: 514.0000000000, - min: 15.0000000000, - max: 31.0000000000, - mean: 20.5600000000, - median: 20.0000000000, - var: 20.8400000000, - std_dev: 4.5650848842, - std_dev_pct: 22.2037202539, - median_abs_dev: 5.9304000000, - median_abs_dev_pct: 29.6520000000, - quartiles: (17.0000000000,20.0000000000,24.0000000000), - iqr: 7.0000000000, - }; - check(val, summ); - } - #[test] - fn test_pois25lambda30() { - let val = &[ - 27.0000000000, - 33.0000000000, - 34.0000000000, - 34.0000000000, - 24.0000000000, - 39.0000000000, - 28.0000000000, - 27.0000000000, - 31.0000000000, - 28.0000000000, - 38.0000000000, - 21.0000000000, - 33.0000000000, - 36.0000000000, - 29.0000000000, - 37.0000000000, - 32.0000000000, - 34.0000000000, - 31.0000000000, - 39.0000000000, - 25.0000000000, - 31.0000000000, - 32.0000000000, - 40.0000000000, - 24.0000000000, - ]; - let summ = &Summary { - sum: 787.0000000000, - min: 21.0000000000, - max: 40.0000000000, - mean: 31.4800000000, - median: 32.0000000000, - var: 26.5933333333, - std_dev: 5.1568724372, - std_dev_pct: 16.3814245145, - median_abs_dev: 5.9304000000, - median_abs_dev_pct: 18.5325000000, - quartiles: (28.0000000000,32.0000000000,34.0000000000), - iqr: 6.0000000000, - }; - check(val, summ); - } - #[test] - fn test_pois25lambda40() { - let val = &[ - 42.0000000000, - 50.0000000000, - 42.0000000000, - 46.0000000000, - 34.0000000000, - 45.0000000000, - 34.0000000000, - 49.0000000000, - 39.0000000000, - 28.0000000000, - 40.0000000000, - 35.0000000000, - 37.0000000000, - 39.0000000000, - 46.0000000000, - 44.0000000000, - 32.0000000000, - 45.0000000000, - 42.0000000000, - 37.0000000000, - 48.0000000000, - 42.0000000000, - 33.0000000000, - 42.0000000000, - 48.0000000000, - ]; - let summ = &Summary { - sum: 1019.0000000000, - min: 28.0000000000, - max: 50.0000000000, - mean: 40.7600000000, - median: 42.0000000000, - var: 34.4400000000, - std_dev: 5.8685603004, - std_dev_pct: 14.3978417577, - median_abs_dev: 5.9304000000, - median_abs_dev_pct: 14.1200000000, - quartiles: (37.0000000000,42.0000000000,45.0000000000), - iqr: 8.0000000000, - }; - check(val, summ); - } - #[test] - fn test_pois25lambda50() { - let val = &[ - 45.0000000000, - 43.0000000000, - 44.0000000000, - 61.0000000000, - 51.0000000000, - 53.0000000000, - 59.0000000000, - 52.0000000000, - 49.0000000000, - 51.0000000000, - 51.0000000000, - 50.0000000000, - 49.0000000000, - 56.0000000000, - 42.0000000000, - 52.0000000000, - 51.0000000000, - 43.0000000000, - 48.0000000000, - 48.0000000000, - 50.0000000000, - 42.0000000000, - 43.0000000000, - 42.0000000000, - 60.0000000000, - ]; - let summ = &Summary { - sum: 1235.0000000000, - min: 42.0000000000, - max: 61.0000000000, - mean: 49.4000000000, - median: 50.0000000000, - var: 31.6666666667, - std_dev: 5.6273143387, - std_dev_pct: 11.3913245723, - median_abs_dev: 4.4478000000, - median_abs_dev_pct: 8.8956000000, - quartiles: (44.0000000000,50.0000000000,52.0000000000), - iqr: 8.0000000000, - }; - check(val, summ); - } - #[test] - fn test_unif25() { - let val = &[ - 99.0000000000, - 55.0000000000, - 92.0000000000, - 79.0000000000, - 14.0000000000, - 2.0000000000, - 33.0000000000, - 49.0000000000, - 3.0000000000, - 32.0000000000, - 84.0000000000, - 59.0000000000, - 22.0000000000, - 86.0000000000, - 76.0000000000, - 31.0000000000, - 29.0000000000, - 11.0000000000, - 41.0000000000, - 53.0000000000, - 45.0000000000, - 44.0000000000, - 98.0000000000, - 98.0000000000, - 7.0000000000, - ]; - let summ = &Summary { - sum: 1242.0000000000, - min: 2.0000000000, - max: 99.0000000000, - mean: 49.6800000000, - median: 45.0000000000, - var: 1015.6433333333, - std_dev: 31.8691595957, - std_dev_pct: 64.1488719719, - median_abs_dev: 45.9606000000, - median_abs_dev_pct: 102.1346666667, - quartiles: (29.0000000000,45.0000000000,79.0000000000), - iqr: 50.0000000000, - }; - check(val, summ); - } - - #[test] - fn test_boxplot_nonpositive() { - fn t(s: &Summary, expected: ~str) { - use std::io::MemWriter; - let mut m = MemWriter::new(); - write_boxplot(&mut m as &mut io::Writer, s, 30).unwrap(); - let out = str::from_utf8_owned(m.unwrap()).unwrap(); - assert_eq!(out, expected); - } - - t(&Summary::new([-2.0, -1.0]), ~"-2 |[------******#*****---]| -1"); - t(&Summary::new([0.0, 2.0]), ~"0 |[-------*****#*******---]| 2"); - t(&Summary::new([-2.0, 0.0]), ~"-2 |[------******#******---]| 0"); - - } - #[test] - fn test_sum_f64s() { - assert_eq!([0.5, 3.2321, 1.5678].sum(), 5.2999); - } - #[test] - fn test_sum_f64_between_ints_that_sum_to_0() { - assert_eq!([1e30, 1.2, -1e30].sum(), 1.2); - } -} - -#[cfg(test)] -mod bench { - extern crate test; - use self::test::BenchHarness; - use std::vec; - use stats::Stats; - - #[bench] - fn sum_three_items(bh: &mut BenchHarness) { - bh.iter(|| { - [1e20, 1.5, -1e20].sum(); - }) - } - #[bench] - fn sum_many_f64(bh: &mut BenchHarness) { - let nums = [-1e30, 1e60, 1e30, 1.0, -1e60]; - let v = vec::from_fn(500, |i| nums[i%5]); - - bh.iter(|| { - v.sum(); - }) - } -} diff --git a/src/libextra/tempfile.rs b/src/libextra/tempfile.rs deleted file mode 100644 index 905541604e0..00000000000 --- a/src/libextra/tempfile.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Temporary files and directories - - -use std::os; -use rand::{task_rng, Rng}; -use std::io; -use std::io::fs; - -/// A wrapper for a path to temporary directory implementing automatic -/// scope-based deletion. -pub struct TempDir { - priv path: Option<Path> -} - -impl TempDir { - /// Attempts to make a temporary directory inside of `tmpdir` whose name - /// will have the suffix `suffix`. The directory will be automatically - /// deleted once the returned wrapper is destroyed. - /// - /// If no directory can be created, None is returned. - pub fn new_in(tmpdir: &Path, suffix: &str) -> Option<TempDir> { - if !tmpdir.is_absolute() { - let abs_tmpdir = os::make_absolute(tmpdir); - return TempDir::new_in(&abs_tmpdir, suffix); - } - - let mut r = task_rng(); - for _ in range(0u, 1000) { - let p = tmpdir.join(r.gen_ascii_str(16) + suffix); - match fs::mkdir(&p, io::UserRWX) { - Err(..) => {} - Ok(()) => return Some(TempDir { path: Some(p) }) - } - } - None - } - - /// Attempts to make a temporary directory inside of `os::tmpdir()` whose - /// name will have the suffix `suffix`. The directory will be automatically - /// deleted once the returned wrapper is destroyed. - /// - /// If no directory can be created, None is returned. - pub fn new(suffix: &str) -> Option<TempDir> { - TempDir::new_in(&os::tmpdir(), suffix) - } - - /// Unwrap the wrapped `std::path::Path` from the `TempDir` wrapper. - /// This discards the wrapper so that the automatic deletion of the - /// temporary directory is prevented. - pub fn unwrap(self) -> Path { - let mut tmpdir = self; - tmpdir.path.take_unwrap() - } - - /// Access the wrapped `std::path::Path` to the temporary directory. - pub fn path<'a>(&'a self) -> &'a Path { - self.path.get_ref() - } -} - -impl Drop for TempDir { - fn drop(&mut self) { - for path in self.path.iter() { - if path.exists() { - // FIXME: is failing the right thing to do? - fs::rmdir_recursive(path).unwrap(); - } - } - } -} - -// the tests for this module need to change the path using change_dir, -// and this doesn't play nicely with other tests so these unit tests are located -// in src/test/run-pass/tempfile.rs diff --git a/src/libextra/unicode.rs b/src/libextra/unicode.rs deleted file mode 100644 index 094a4b02a24..00000000000 --- a/src/libextra/unicode.rs +++ /dev/null @@ -1,262 +0,0 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#[allow(missing_doc)]; - -pub mod icu { - pub type UBool = u8; - pub type UProperty = int; - pub type UChar32 = char; - - pub static TRUE : u8 = 1u8; - pub static FALSE : u8 = 0u8; - - pub static UCHAR_ALPHABETIC : UProperty = 0; - pub static UCHAR_BINARY_START : UProperty = 0; // = UCHAR_ALPHABETIC - pub static UCHAR_ASCII_HEX_DIGIT : UProperty = 1; - pub static UCHAR_BIDI_CONTROL : UProperty = 2; - - pub static UCHAR_BIDI_MIRRORED : UProperty = 3; - pub static UCHAR_DASH : UProperty = 4; - pub static UCHAR_DEFAULT_IGNORABLE_CODE_POINT : UProperty = 5; - pub static UCHAR_DEPRECATED : UProperty = 6; - - pub static UCHAR_DIACRITIC : UProperty = 7; - pub static UCHAR_EXTENDER : UProperty = 8; - pub static UCHAR_FULL_COMPOSITION_EXCLUSION : UProperty = 9; - pub static UCHAR_GRAPHEME_BASE : UProperty = 10; - - pub static UCHAR_GRAPHEME_EXTEND : UProperty = 11; - pub static UCHAR_GRAPHEME_LINK : UProperty = 12; - pub static UCHAR_HEX_DIGIT : UProperty = 13; - pub static UCHAR_HYPHEN : UProperty = 14; - - pub static UCHAR_ID_CONTINUE : UProperty = 15; - pub static UCHAR_ID_START : UProperty = 16; - pub static UCHAR_IDEOGRAPHIC : UProperty = 17; - pub static UCHAR_IDS_BINARY_OPERATOR : UProperty = 18; - - pub static UCHAR_IDS_TRINARY_OPERATOR : UProperty = 19; - pub static UCHAR_JOIN_CONTROL : UProperty = 20; - pub static UCHAR_LOGICAL_ORDER_EXCEPTION : UProperty = 21; - pub static UCHAR_LOWERCASE : UProperty = 22; - - pub static UCHAR_MATH : UProperty = 23; - pub static UCHAR_NONCHARACTER_CODE_POINT : UProperty = 24; - pub static UCHAR_QUOTATION_MARK : UProperty = 25; - pub static UCHAR_RADICAL : UProperty = 26; - - pub static UCHAR_SOFT_DOTTED : UProperty = 27; - pub static UCHAR_TERMINAL_PUNCTUATION : UProperty = 28; - pub static UCHAR_UNIFIED_IDEOGRAPH : UProperty = 29; - pub static UCHAR_UPPERCASE : UProperty = 30; - - pub static UCHAR_WHITE_SPACE : UProperty = 31; - pub static UCHAR_XID_CONTINUE : UProperty = 32; - pub static UCHAR_XID_START : UProperty = 33; - pub static UCHAR_CASE_SENSITIVE : UProperty = 34; - - pub static UCHAR_S_TERM : UProperty = 35; - pub static UCHAR_VARIATION_SELECTOR : UProperty = 36; - pub static UCHAR_NFD_INERT : UProperty = 37; - pub static UCHAR_NFKD_INERT : UProperty = 38; - - pub static UCHAR_NFC_INERT : UProperty = 39; - pub static UCHAR_NFKC_INERT : UProperty = 40; - pub static UCHAR_SEGMENT_STARTER : UProperty = 41; - pub static UCHAR_PATTERN_SYNTAX : UProperty = 42; - - pub static UCHAR_PATTERN_WHITE_SPACE : UProperty = 43; - pub static UCHAR_POSIX_ALNUM : UProperty = 44; - pub static UCHAR_POSIX_BLANK : UProperty = 45; - pub static UCHAR_POSIX_GRAPH : UProperty = 46; - - pub static UCHAR_POSIX_PRINT : UProperty = 47; - pub static UCHAR_POSIX_XDIGIT : UProperty = 48; - pub static UCHAR_CASED : UProperty = 49; - pub static UCHAR_CASE_IGNORABLE : UProperty = 50; - - pub static UCHAR_CHANGES_WHEN_LOWERCASED : UProperty = 51; - pub static UCHAR_CHANGES_WHEN_UPPERCASED : UProperty = 52; - pub static UCHAR_CHANGES_WHEN_TITLECASED : UProperty = 53; - pub static UCHAR_CHANGES_WHEN_CASEFOLDED : UProperty = 54; - - pub static UCHAR_CHANGES_WHEN_CASEMAPPED : UProperty = 55; - pub static UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED : UProperty = 56; - pub static UCHAR_BINARY_LIMIT : UProperty = 57; - pub static UCHAR_BIDI_CLASS : UProperty = 0x1000; - - pub static UCHAR_INT_START : UProperty = 0x1000; // UCHAR_BIDI_CLASS - pub static UCHAR_BLOCK : UProperty = 0x1001; - pub static UCHAR_CANONICAL_COMBINING_CLASS : UProperty = 0x1002; - pub static UCHAR_DECOMPOSITION_TYPE : UProperty = 0x1003; - - pub static UCHAR_EAST_ASIAN_WIDTH : UProperty = 0x1004; - pub static UCHAR_GENERAL_CATEGORY : UProperty = 0x1005; - pub static UCHAR_JOINING_GROUP : UProperty = 0x1006; - pub static UCHAR_JOINING_TYPE : UProperty = 0x1007; - - pub static UCHAR_LINE_BREAK : UProperty = 0x1008; - pub static UCHAR_NUMERIC_TYPE : UProperty = 0x1009; - pub static UCHAR_SCRIPT : UProperty = 0x100A; - pub static UCHAR_HANGUL_SYLLABLE_TYPE : UProperty = 0x100B; - - pub static UCHAR_NFD_QUICK_CHECK : UProperty = 0x100C; - pub static UCHAR_NFKD_QUICK_CHECK : UProperty = 0x100D; - pub static UCHAR_NFC_QUICK_CHECK : UProperty = 0x100E; - pub static UCHAR_NFKC_QUICK_CHECK : UProperty = 0x100F; - - pub static UCHAR_LEAD_CANONICAL_COMBINING_CLASS : UProperty = 0x1010; - pub static UCHAR_TRAIL_CANONICAL_COMBINING_CLASS : UProperty = 0x1011; - pub static UCHAR_GRAPHEME_CLUSTER_BREAK : UProperty = 0x1012; - pub static UCHAR_SENTENCE_BREAK : UProperty = 0x1013; - - pub static UCHAR_WORD_BREAK : UProperty = 0x1014; - pub static UCHAR_INT_LIMIT : UProperty = 0x1015; - - pub static UCHAR_GENERAL_CATEGORY_MASK : UProperty = 0x2000; - pub static UCHAR_MASK_START : UProperty = 0x2000; - // = UCHAR_GENERAL_CATEGORY_MASK - pub static UCHAR_MASK_LIMIT : UProperty = 0x2001; - - pub static UCHAR_NUMERIC_VALUE : UProperty = 0x3000; - pub static UCHAR_DOUBLE_START : UProperty = 0x3000; - // = UCHAR_NUMERIC_VALUE - pub static UCHAR_DOUBLE_LIMIT : UProperty = 0x3001; - - pub static UCHAR_AGE : UProperty = 0x4000; - pub static UCHAR_STRING_START : UProperty = 0x4000; // = UCHAR_AGE - pub static UCHAR_BIDI_MIRRORING_GLYPH : UProperty = 0x4001; - pub static UCHAR_CASE_FOLDING : UProperty = 0x4002; - - pub static UCHAR_ISO_COMMENT : UProperty = 0x4003; - pub static UCHAR_LOWERCASE_MAPPING : UProperty = 0x4004; - pub static UCHAR_NAME : UProperty = 0x4005; - pub static UCHAR_SIMPLE_CASE_FOLDING : UProperty = 0x4006; - - pub static UCHAR_SIMPLE_LOWERCASE_MAPPING : UProperty = 0x4007; - pub static UCHAR_SIMPLE_TITLECASE_MAPPING : UProperty = 0x4008; - pub static UCHAR_SIMPLE_UPPERCASE_MAPPING : UProperty = 0x4009; - pub static UCHAR_TITLECASE_MAPPING : UProperty = 0x400A; - - pub static UCHAR_UNICODE_1_NAME : UProperty = 0x400B; - pub static UCHAR_UPPERCASE_MAPPING : UProperty = 0x400C; - pub static UCHAR_STRING_LIMIT : UProperty = 0x400D; - - pub static UCHAR_SCRIPT_EXTENSIONS : UProperty = 0x7000; - pub static UCHAR_OTHER_PROPERTY_START : UProperty = 0x7000; - // = UCHAR_SCRIPT_EXTENSIONS; - pub static UCHAR_OTHER_PROPERTY_LIMIT : UProperty = 0x7001; - - pub static UCHAR_INVALID_CODE : UProperty = -1; - - pub mod libicu { - use unicode::icu::*; - - // #[link_name = "icuuc"] - #[link(name = "icuuc")] - extern { - pub fn u_hasBinaryProperty(c: UChar32, which: UProperty) -> UBool; - pub fn u_isdigit(c: UChar32) -> UBool; - pub fn u_islower(c: UChar32) -> UBool; - pub fn u_isspace(c: UChar32) -> UBool; - pub fn u_isupper(c: UChar32) -> UBool; - pub fn u_tolower(c: UChar32) -> UChar32; - pub fn u_toupper(c: UChar32) -> UChar32; - } - } -} - -pub fn is_XID_start(c: char) -> bool { - unsafe { - return icu::libicu::u_hasBinaryProperty(c, icu::UCHAR_XID_START) - == icu::TRUE; - } -} - -pub fn is_XID_continue(c: char) -> bool { - unsafe { - return icu::libicu::u_hasBinaryProperty(c, icu::UCHAR_XID_START) - == icu::TRUE; - } -} - -/* -Function: is_digit - -Returns true if a character is a digit. -*/ -pub fn is_digit(c: char) -> bool { - unsafe { - return icu::libicu::u_isdigit(c) == icu::TRUE; - } -} - -/* -Function: is_lower - -Returns true if a character is a lowercase letter. -*/ -pub fn is_lower(c: char) -> bool { - unsafe { - return icu::libicu::u_islower(c) == icu::TRUE; - } -} - -/* -Function: is_space - -Returns true if a character is space. -*/ -pub fn is_space(c: char) -> bool { - unsafe { - return icu::libicu::u_isspace(c) == icu::TRUE; - } -} - -/* -Function: is_upper - -Returns true if a character is an uppercase letter. -*/ -pub fn is_upper(c: char) -> bool { - unsafe { - return icu::libicu::u_isupper(c) == icu::TRUE; - } -} - -#[cfg(test)] -mod tests { - use unicode::*; - - #[test] - fn test_is_digit() { - assert!((is_digit('0'))); - assert!((!is_digit('m'))); - } - - #[test] - fn test_is_lower() { - assert!((is_lower('m'))); - assert!((!is_lower('M'))); - } - - #[test] - fn test_is_space() { - assert!((is_space(' '))); - assert!((!is_space('m'))); - } - - #[test] - fn test_is_upper() { - assert!((is_upper('M'))); - assert!((!is_upper('m'))); - } -} diff --git a/src/libextra/url.rs b/src/libextra/url.rs deleted file mode 100644 index 6be90c0056d..00000000000 --- a/src/libextra/url.rs +++ /dev/null @@ -1,1277 +0,0 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Types/fns concerning URLs (see RFC 3986) - -#[allow(missing_doc)]; - -use std::cmp::Eq; -use std::fmt; -use std::hash::Hash; -use std::io::BufReader; -use std::from_str::FromStr; -use std::uint; - -use collections::HashMap; - -/// A Uniform Resource Locator (URL). A URL is a form of URI (Uniform Resource -/// Identifier) that includes network location information, such as hostname or -/// port number. -/// -/// # Example -/// -/// ```rust -/// use extra::url::{Url, UserInfo}; -/// -/// let url = Url { scheme: ~"https", -/// user: Some(UserInfo { user: ~"username", pass: None }), -/// host: ~"example.com", -/// port: Some(~"8080"), -/// path: ~"/foo/bar", -/// query: ~[(~"baz", ~"qux")], -/// fragment: Some(~"quz") }; -/// // https://username@example.com:8080/foo/bar?baz=qux#quz -/// ``` -#[deriving(Clone, Eq)] -pub struct Url { - /// The scheme part of a URL, such as `https` in the above example. - scheme: ~str, - /// A URL subcomponent for user authentication. `username` in the above example. - user: Option<UserInfo>, - /// A domain name or IP address. For example, `example.com`. - host: ~str, - /// A TCP port number, for example `8080`. - port: Option<~str>, - /// The path component of a URL, for example `/foo/bar`. - path: ~str, - /// The query component of a URL. `~[(~"baz", ~"qux")]` represents the - /// fragment `baz=qux` in the above example. - query: Query, - /// The fragment component, such as `quz`. Doesn't include the leading `#` character. - fragment: Option<~str> -} - -#[deriving(Clone, Eq)] -pub struct Path { - /// The path component of a URL, for example `/foo/bar`. - path: ~str, - /// The query component of a URL. `~[(~"baz", ~"qux")]` represents the - /// fragment `baz=qux` in the above example. - query: Query, - /// The fragment component, such as `quz`. Doesn't include the leading `#` character. - fragment: Option<~str> -} - -/// An optional subcomponent of a URI authority component. -#[deriving(Clone, Eq)] -pub struct UserInfo { - /// The user name. - user: ~str, - /// Password or other scheme-specific authentication information. - pass: Option<~str> -} - -/// Represents the query component of a URI. -pub type Query = ~[(~str, ~str)]; - -impl Url { - pub fn new(scheme: ~str, - user: Option<UserInfo>, - host: ~str, - port: Option<~str>, - path: ~str, - query: Query, - fragment: Option<~str>) - -> Url { - Url { - scheme: scheme, - user: user, - host: host, - port: port, - path: path, - query: query, - fragment: fragment, - } - } -} - -impl Path { - pub fn new(path: ~str, - query: Query, - fragment: Option<~str>) - -> Path { - Path { - path: path, - query: query, - fragment: fragment, - } - } -} - -impl UserInfo { - #[inline] - pub fn new(user: ~str, pass: Option<~str>) -> UserInfo { - UserInfo { user: user, pass: pass } - } -} - -fn encode_inner(s: &str, full_url: bool) -> ~str { - let mut rdr = BufReader::new(s.as_bytes()); - let mut out = ~""; - - loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Err(..) => break, - Ok(..) => buf[0] as char, - }; - - match ch { - // unreserved: - 'A' .. 'Z' | - 'a' .. 'z' | - '0' .. '9' | - '-' | '.' | '_' | '~' => { - out.push_char(ch); - } - _ => { - if full_url { - match ch { - // gen-delims: - ':' | '/' | '?' | '#' | '[' | ']' | '@' | - - // sub-delims: - '!' | '$' | '&' | '"' | '(' | ')' | '*' | - '+' | ',' | ';' | '=' => { - out.push_char(ch); - } - - _ => out.push_str(format!("%{:X}", ch as uint)) - } - } else { - out.push_str(format!("%{:X}", ch as uint)); - } - } - } - } - - out -} - -/** - * Encodes a URI by replacing reserved characters with percent-encoded - * character sequences. - * - * This function is compliant with RFC 3986. - * - * # Example - * - * ```rust - * use extra::url::encode; - * - * let url = encode(&"https://example.com/Rust (programming language)"); - * println!("{}", url); // https://example.com/Rust%20(programming%20language) - * ``` - */ -pub fn encode(s: &str) -> ~str { - encode_inner(s, true) -} - -/** - * Encodes a URI component by replacing reserved characters with percent - * encoded character sequences. - * - * This function is compliant with RFC 3986. - */ - -pub fn encode_component(s: &str) -> ~str { - encode_inner(s, false) -} - -fn decode_inner(s: &str, full_url: bool) -> ~str { - let mut rdr = BufReader::new(s.as_bytes()); - let mut out = ~""; - - loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Err(..) => break, - Ok(..) => buf[0] as char - }; - match ch { - '%' => { - let mut bytes = [0, 0]; - match rdr.read(bytes) { - Ok(2) => {} - _ => fail!() // FIXME: malformed url? - } - let ch = uint::parse_bytes(bytes, 16u).unwrap() as u8 as char; - - if full_url { - // Only decode some characters: - match ch { - // gen-delims: - ':' | '/' | '?' | '#' | '[' | ']' | '@' | - - // sub-delims: - '!' | '$' | '&' | '"' | '(' | ')' | '*' | - '+' | ',' | ';' | '=' => { - out.push_char('%'); - out.push_char(bytes[0u] as char); - out.push_char(bytes[1u] as char); - } - - ch => out.push_char(ch) - } - } else { - out.push_char(ch); - } - } - ch => out.push_char(ch) - } - } - - out -} - -/** - * Decodes a percent-encoded string representing a URI. - * - * This will only decode escape sequences generated by `encode`. - * - * # Example - * - * ```rust - * use extra::url::decode; - * - * let url = decode(&"https://example.com/Rust%20(programming%20language)"); - * println!("{}", url); // https://example.com/Rust (programming language) - * ``` - */ -pub fn decode(s: &str) -> ~str { - decode_inner(s, true) -} - -/** - * Decode a string encoded with percent encoding. - */ -pub fn decode_component(s: &str) -> ~str { - decode_inner(s, false) -} - -fn encode_plus(s: &str) -> ~str { - let mut rdr = BufReader::new(s.as_bytes()); - let mut out = ~""; - - loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Ok(..) => buf[0] as char, - Err(..) => break, - }; - match ch { - 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '.' | '-' => { - out.push_char(ch); - } - ' ' => out.push_char('+'), - _ => out.push_str(format!("%{:X}", ch as uint)) - } - } - - out -} - -/** - * Encode a hashmap to the 'application/x-www-form-urlencoded' media type. - */ -pub fn encode_form_urlencoded(m: &HashMap<~str, ~[~str]>) -> ~str { - let mut out = ~""; - let mut first = true; - - for (key, values) in m.iter() { - let key = encode_plus(*key); - - for value in values.iter() { - if first { - first = false; - } else { - out.push_char('&'); - first = false; - } - - out.push_str(format!("{}={}", key, encode_plus(*value))); - } - } - - out -} - -/** - * Decode a string encoded with the 'application/x-www-form-urlencoded' media - * type into a hashmap. - */ -pub fn decode_form_urlencoded(s: &[u8]) -> HashMap<~str, ~[~str]> { - let mut rdr = BufReader::new(s); - let mut m = HashMap::new(); - let mut key = ~""; - let mut value = ~""; - let mut parsing_key = true; - - loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Ok(..) => buf[0] as char, - Err(..) => break, - }; - match ch { - '&' | ';' => { - if key != ~"" && value != ~"" { - let mut values = match m.pop(&key) { - Some(values) => values, - None => ~[], - }; - - values.push(value); - m.insert(key, values); - } - - parsing_key = true; - key = ~""; - value = ~""; - } - '=' => parsing_key = false, - ch => { - let ch = match ch { - '%' => { - let mut bytes = [0, 0]; - match rdr.read(bytes) { - Ok(2) => {} - _ => fail!() // FIXME: malformed? - } - uint::parse_bytes(bytes, 16u).unwrap() as u8 as char - } - '+' => ' ', - ch => ch - }; - - if parsing_key { - key.push_char(ch) - } else { - value.push_char(ch) - } - } - } - } - - if key != ~"" && value != ~"" { - let mut values = match m.pop(&key) { - Some(values) => values, - None => ~[], - }; - - values.push(value); - m.insert(key, values); - } - - m -} - - -fn split_char_first(s: &str, c: char) -> (~str, ~str) { - let len = s.len(); - let mut index = len; - let mut mat = 0; - let mut rdr = BufReader::new(s.as_bytes()); - loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Ok(..) => buf[0] as char, - Err(..) => break, - }; - if ch == c { - // found a match, adjust markers - index = (rdr.tell().unwrap() as uint) - 1; - mat = 1; - break; - } - } - if index+mat == len { - return (s.slice(0, index).to_owned(), ~""); - } else { - return (s.slice(0, index).to_owned(), - s.slice(index + mat, s.len()).to_owned()); - } -} - -impl fmt::Show for UserInfo { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.pass { - Some(ref pass) => write!(f.buf, "{}:{}@", self.user, *pass), - None => write!(f.buf, "{}@", self.user), - } - } -} - -fn query_from_str(rawquery: &str) -> Query { - let mut query: Query = ~[]; - if !rawquery.is_empty() { - for p in rawquery.split('&') { - let (k, v) = split_char_first(p, '='); - query.push((decode_component(k), decode_component(v))); - }; - } - return query; -} - -/** - * Converts an instance of a URI `Query` type to a string. - * - * # Example - * - * ```rust - * use extra::url; - * - * let query = ~[(~"title", ~"The Village"), (~"north", ~"52.91"), (~"west", ~"4.10")]; - * println!("{}", url::query_to_str(&query)); // title=The%20Village&north=52.91&west=4.10 - * ``` - */ -#[allow(unused_must_use)] -pub fn query_to_str(query: &Query) -> ~str { - use std::io::MemWriter; - use std::str; - - let mut writer = MemWriter::new(); - for (i, &(ref k, ref v)) in query.iter().enumerate() { - if i != 0 { write!(&mut writer, "&"); } - write!(&mut writer, "{}={}", encode_component(*k), - encode_component(*v)); - } - str::from_utf8_lossy(writer.unwrap()).into_owned() -} - -/** - * Returns a tuple of the URI scheme and the rest of the URI, or a parsing error. - * - * Does not include the separating `:` character. - * - * # Example - * - * ```rust - * use extra::url::get_scheme; - * - * let scheme = match get_scheme("https://example.com/") { - * Ok((sch, _)) => sch, - * Err(_) => ~"(None)", - * }; - * println!("Scheme in use: {}.", scheme); // Scheme in use: https. - * ``` - */ -pub fn get_scheme(rawurl: &str) -> Result<(~str, ~str), ~str> { - for (i,c) in rawurl.chars().enumerate() { - match c { - 'A' .. 'Z' | 'a' .. 'z' => continue, - '0' .. '9' | '+' | '-' | '.' => { - if i == 0 { - return Err(~"url: Scheme must begin with a letter."); - } - continue; - } - ':' => { - if i == 0 { - return Err(~"url: Scheme cannot be empty."); - } else { - return Ok((rawurl.slice(0,i).to_owned(), - rawurl.slice(i+1,rawurl.len()).to_owned())); - } - } - _ => { - return Err(~"url: Invalid character in scheme."); - } - } - }; - return Err(~"url: Scheme must be terminated with a colon."); -} - -#[deriving(Clone, Eq)] -enum Input { - Digit, // all digits - Hex, // digits and letters a-f - Unreserved // all other legal characters -} - -// returns userinfo, host, port, and unparsed part, or an error -fn get_authority(rawurl: &str) -> - Result<(Option<UserInfo>, ~str, Option<~str>, ~str), ~str> { - if !rawurl.starts_with("//") { - // there is no authority. - return Ok((None, ~"", None, rawurl.to_str())); - } - - enum State { - Start, // starting state - PassHostPort, // could be in user or port - Ip6Port, // either in ipv6 host or port - Ip6Host, // are in an ipv6 host - InHost, // are in a host - may be ipv6, but don't know yet - InPort // are in port - } - - let len = rawurl.len(); - let mut st = Start; - let mut input = Digit; // most restricted, start here. - - let mut userinfo = None; - let mut host = ~""; - let mut port = None; - - let mut colon_count = 0; - let mut pos = 0; - let mut begin = 2; - let mut end = len; - - for (i,c) in rawurl.chars().enumerate() { - if i < 2 { continue; } // ignore the leading // - - // deal with input class first - match c { - '0' .. '9' => (), - 'A' .. 'F' | 'a' .. 'f' => { - if input == Digit { - input = Hex; - } - } - 'G' .. 'Z' | 'g' .. 'z' | '-' | '.' | '_' | '~' | '%' | - '&' |'\'' | '(' | ')' | '+' | '!' | '*' | ',' | ';' | '=' => { - input = Unreserved; - } - ':' | '@' | '?' | '#' | '/' => { - // separators, don't change anything - } - _ => { - return Err(~"Illegal character in authority"); - } - } - - // now process states - match c { - ':' => { - colon_count += 1; - match st { - Start => { - pos = i; - st = PassHostPort; - } - PassHostPort => { - // multiple colons means ipv6 address. - if input == Unreserved { - return Err( - ~"Illegal characters in IPv6 address."); - } - st = Ip6Host; - } - InHost => { - pos = i; - if input == Unreserved { - // must be port - host = rawurl.slice(begin, i).to_owned(); - st = InPort; - } else { - // can't be sure whether this is an ipv6 address or a port - st = Ip6Port; - } - } - Ip6Port => { - if input == Unreserved { - return Err(~"Illegal characters in authority."); - } - st = Ip6Host; - } - Ip6Host => { - if colon_count > 7 { - host = rawurl.slice(begin, i).to_owned(); - pos = i; - st = InPort; - } - } - _ => { - return Err(~"Invalid ':' in authority."); - } - } - input = Digit; // reset input class - } - - '@' => { - input = Digit; // reset input class - colon_count = 0; // reset count - match st { - Start => { - let user = rawurl.slice(begin, i).to_owned(); - userinfo = Some(UserInfo::new(user, None)); - st = InHost; - } - PassHostPort => { - let user = rawurl.slice(begin, pos).to_owned(); - let pass = rawurl.slice(pos+1, i).to_owned(); - userinfo = Some(UserInfo::new(user, Some(pass))); - st = InHost; - } - _ => { - return Err(~"Invalid '@' in authority."); - } - } - begin = i+1; - } - - '?' | '#' | '/' => { - end = i; - break; - } - _ => () - } - } - - // finish up - match st { - Start => { - host = rawurl.slice(begin, end).to_owned(); - } - PassHostPort | Ip6Port => { - if input != Digit { - return Err(~"Non-digit characters in port."); - } - host = rawurl.slice(begin, pos).to_owned(); - port = Some(rawurl.slice(pos+1, end).to_owned()); - } - Ip6Host | InHost => { - host = rawurl.slice(begin, end).to_owned(); - } - InPort => { - if input != Digit { - return Err(~"Non-digit characters in port."); - } - port = Some(rawurl.slice(pos+1, end).to_owned()); - } - } - - let rest = rawurl.slice(end, len).to_owned(); - return Ok((userinfo, host, port, rest)); -} - - -// returns the path and unparsed part of url, or an error -fn get_path(rawurl: &str, authority: bool) -> - Result<(~str, ~str), ~str> { - let len = rawurl.len(); - let mut end = len; - for (i,c) in rawurl.chars().enumerate() { - match c { - 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '&' |'\'' | '(' | ')' | '.' - | '@' | ':' | '%' | '/' | '+' | '!' | '*' | ',' | ';' | '=' - | '_' | '-' | '~' => { - continue; - } - '?' | '#' => { - end = i; - break; - } - _ => return Err(~"Invalid character in path.") - } - } - - if authority { - if end != 0 && !rawurl.starts_with("/") { - return Err(~"Non-empty path must begin with\ - '/' in presence of authority."); - } - } - - return Ok((decode_component(rawurl.slice(0, end)), - rawurl.slice(end, len).to_owned())); -} - -// returns the parsed query and the fragment, if present -fn get_query_fragment(rawurl: &str) -> - Result<(Query, Option<~str>), ~str> { - if !rawurl.starts_with("?") { - if rawurl.starts_with("#") { - let f = decode_component(rawurl.slice( - 1, - rawurl.len())); - return Ok((~[], Some(f))); - } else { - return Ok((~[], None)); - } - } - let (q, r) = split_char_first(rawurl.slice(1, rawurl.len()), '#'); - let f = if r.len() != 0 { - Some(decode_component(r)) } else { None }; - return Ok((query_from_str(q), f)); -} - -/** - * Parses a URL, converting it from a string to `Url` representation. - * - * # Arguments - * - * `rawurl` - a string representing the full URL, including scheme. - * - * # Returns - * - * A `Url` struct type representing the URL. - */ -pub fn from_str(rawurl: &str) -> Result<Url, ~str> { - // scheme - let (scheme, rest) = match get_scheme(rawurl) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - // authority - let (userinfo, host, port, rest) = match get_authority(rest) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - // path - let has_authority = if host == ~"" { false } else { true }; - let (path, rest) = match get_path(rest, has_authority) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - // query and fragment - let (query, fragment) = match get_query_fragment(rest) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - Ok(Url::new(scheme, userinfo, host, port, path, query, fragment)) -} - -pub fn path_from_str(rawpath: &str) -> Result<Path, ~str> { - let (path, rest) = match get_path(rawpath, false) { - Ok(val) => val, - Err(e) => return Err(e) - }; - - // query and fragment - let (query, fragment) = match get_query_fragment(rest) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - Ok(Path{ path: path, query: query, fragment: fragment }) -} - -impl FromStr for Url { - fn from_str(s: &str) -> Option<Url> { - match from_str(s) { - Ok(url) => Some(url), - Err(_) => None - } - } -} - -impl FromStr for Path { - fn from_str(s: &str) -> Option<Path> { - match path_from_str(s) { - Ok(path) => Some(path), - Err(_) => None - } - } -} - -impl fmt::Show for Url { - /** - * Converts a URL from `Url` to string representation. - * - * # Arguments - * - * `url` - a URL. - * - * # Returns - * - * A string that contains the formatted URL. Note that this will usually - * be an inverse of `from_str` but might strip out unneeded separators; - * for example, "http://somehost.com?", when parsed and formatted, will - * result in just "http://somehost.com". - */ - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - try!(write!(f.buf, "{}:", self.scheme)); - - if !self.host.is_empty() { - try!(write!(f.buf, "//")); - match self.user { - Some(ref user) => try!(write!(f.buf, "{}", *user)), - None => {} - } - match self.port { - Some(ref port) => try!(write!(f.buf, "{}:{}", self.host, - *port)), - None => try!(write!(f.buf, "{}", self.host)), - } - } - - try!(write!(f.buf, "{}", self.path)); - - if !self.query.is_empty() { - try!(write!(f.buf, "?{}", query_to_str(&self.query))); - } - - match self.fragment { - Some(ref fragment) => write!(f.buf, "\\#{}", - encode_component(*fragment)), - None => Ok(()), - } - } -} - -impl fmt::Show for Path { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - try!(write!(f.buf, "{}", self.path)); - if !self.query.is_empty() { - try!(write!(f.buf, "?{}", self.query)) - } - - match self.fragment { - Some(ref fragment) => { - write!(f.buf, "\\#{}", encode_component(*fragment)) - } - None => Ok(()) - } - } -} - -impl<S: Writer> Hash<S> for Url { - fn hash(&self, state: &mut S) { - self.to_str().hash(state) - } -} - -impl<S: Writer> Hash<S> for Path { - fn hash(&self, state: &mut S) { - self.to_str().hash(state) - } -} - -// Put a few tests outside of the 'test' module so they can test the internal -// functions and those functions don't need 'pub' - -#[test] -fn test_split_char_first() { - let (u,v) = split_char_first("hello, sweet world", ','); - assert_eq!(u, ~"hello"); - assert_eq!(v, ~" sweet world"); - - let (u,v) = split_char_first("hello sweet world", ','); - assert_eq!(u, ~"hello sweet world"); - assert_eq!(v, ~""); -} - -#[test] -fn test_get_authority() { - let (u, h, p, r) = get_authority( - "//user:pass@rust-lang.org/something").unwrap(); - assert_eq!(u, Some(UserInfo::new(~"user", Some(~"pass")))); - assert_eq!(h, ~"rust-lang.org"); - assert!(p.is_none()); - assert_eq!(r, ~"/something"); - - let (u, h, p, r) = get_authority( - "//rust-lang.org:8000?something").unwrap(); - assert!(u.is_none()); - assert_eq!(h, ~"rust-lang.org"); - assert_eq!(p, Some(~"8000")); - assert_eq!(r, ~"?something"); - - let (u, h, p, r) = get_authority( - "//rust-lang.org#blah").unwrap(); - assert!(u.is_none()); - assert_eq!(h, ~"rust-lang.org"); - assert!(p.is_none()); - assert_eq!(r, ~"#blah"); - - // ipv6 tests - let (_, h, _, _) = get_authority( - "//2001:0db8:85a3:0042:0000:8a2e:0370:7334#blah").unwrap(); - assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334"); - - let (_, h, p, _) = get_authority( - "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah").unwrap(); - assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334"); - assert_eq!(p, Some(~"8000")); - - let (u, h, p, _) = get_authority( - "//us:p@2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah" - ).unwrap(); - assert_eq!(u, Some(UserInfo::new(~"us", Some(~"p")))); - assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334"); - assert_eq!(p, Some(~"8000")); - - // invalid authorities; - assert!(get_authority("//user:pass@rust-lang:something").is_err()); - assert!(get_authority("//user@rust-lang:something:/path").is_err()); - assert!(get_authority( - "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:800a").is_err()); - assert!(get_authority( - "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000:00").is_err()); - - // these parse as empty, because they don't start with '//' - let (_, h, _, _) = get_authority("user:pass@rust-lang").unwrap(); - assert_eq!(h, ~""); - let (_, h, _, _) = get_authority("rust-lang.org").unwrap(); - assert_eq!(h, ~""); -} - -#[test] -fn test_get_path() { - let (p, r) = get_path("/something+%20orother", true).unwrap(); - assert_eq!(p, ~"/something+ orother"); - assert_eq!(r, ~""); - let (p, r) = get_path("test@email.com#fragment", false).unwrap(); - assert_eq!(p, ~"test@email.com"); - assert_eq!(r, ~"#fragment"); - let (p, r) = get_path("/gen/:addr=?q=v", false).unwrap(); - assert_eq!(p, ~"/gen/:addr="); - assert_eq!(r, ~"?q=v"); - - //failure cases - assert!(get_path("something?q", true).is_err()); -} - -#[cfg(test)] -mod tests { - - use super::*; - - use collections::HashMap; - - #[test] - fn test_url_parse() { - let url = ~"http://user:pass@rust-lang.org:8080/doc/~u?s=v#something"; - - let up = from_str(url); - let u = up.unwrap(); - assert_eq!(&u.scheme, &~"http"); - assert_eq!(&u.user, &Some(UserInfo::new(~"user", Some(~"pass")))); - assert_eq!(&u.host, &~"rust-lang.org"); - assert_eq!(&u.port, &Some(~"8080")); - assert_eq!(&u.path, &~"/doc/~u"); - assert_eq!(&u.query, &~[(~"s", ~"v")]); - assert_eq!(&u.fragment, &Some(~"something")); - } - - #[test] - fn test_path_parse() { - let path = ~"/doc/~u?s=v#something"; - - let up = path_from_str(path); - let u = up.unwrap(); - assert_eq!(&u.path, &~"/doc/~u"); - assert_eq!(&u.query, &~[(~"s", ~"v")]); - assert_eq!(&u.fragment, &Some(~"something")); - } - - #[test] - fn test_url_parse_host_slash() { - let urlstr = ~"http://0.42.42.42/"; - let url = from_str(urlstr).unwrap(); - assert!(url.host == ~"0.42.42.42"); - assert!(url.path == ~"/"); - } - - #[test] - fn test_path_parse_host_slash() { - let pathstr = ~"/"; - let path = path_from_str(pathstr).unwrap(); - assert!(path.path == ~"/"); - } - - #[test] - fn test_url_host_with_port() { - let urlstr = ~"scheme://host:1234"; - let url = from_str(urlstr).unwrap(); - assert_eq!(&url.scheme, &~"scheme"); - assert_eq!(&url.host, &~"host"); - assert_eq!(&url.port, &Some(~"1234")); - assert_eq!(&url.path, &~""); // is empty path really correct? Other tests think so - let urlstr = ~"scheme://host:1234/"; - let url = from_str(urlstr).unwrap(); - assert_eq!(&url.scheme, &~"scheme"); - assert_eq!(&url.host, &~"host"); - assert_eq!(&url.port, &Some(~"1234")); - assert_eq!(&url.path, &~"/"); - } - - #[test] - fn test_url_with_underscores() { - let urlstr = ~"http://dotcom.com/file_name.html"; - let url = from_str(urlstr).unwrap(); - assert!(url.path == ~"/file_name.html"); - } - - #[test] - fn test_path_with_underscores() { - let pathstr = ~"/file_name.html"; - let path = path_from_str(pathstr).unwrap(); - assert!(path.path == ~"/file_name.html"); - } - - #[test] - fn test_url_with_dashes() { - let urlstr = ~"http://dotcom.com/file-name.html"; - let url = from_str(urlstr).unwrap(); - assert!(url.path == ~"/file-name.html"); - } - - #[test] - fn test_path_with_dashes() { - let pathstr = ~"/file-name.html"; - let path = path_from_str(pathstr).unwrap(); - assert!(path.path == ~"/file-name.html"); - } - - #[test] - fn test_no_scheme() { - assert!(get_scheme("noschemehere.html").is_err()); - } - - #[test] - fn test_invalid_scheme_errors() { - assert!(from_str("99://something").is_err()); - assert!(from_str("://something").is_err()); - } - - #[test] - fn test_full_url_parse_and_format() { - let url = ~"http://user:pass@rust-lang.org/doc?s=v#something"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_userless_url_parse_and_format() { - let url = ~"http://rust-lang.org/doc?s=v#something"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_queryless_url_parse_and_format() { - let url = ~"http://user:pass@rust-lang.org/doc#something"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_empty_query_url_parse_and_format() { - let url = ~"http://user:pass@rust-lang.org/doc?#something"; - let should_be = ~"http://user:pass@rust-lang.org/doc#something"; - assert_eq!(from_str(url).unwrap().to_str(), should_be); - } - - #[test] - fn test_fragmentless_url_parse_and_format() { - let url = ~"http://user:pass@rust-lang.org/doc?q=v"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_minimal_url_parse_and_format() { - let url = ~"http://rust-lang.org/doc"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_url_with_port_parse_and_format() { - let url = ~"http://rust-lang.org:80/doc"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_scheme_host_only_url_parse_and_format() { - let url = ~"http://rust-lang.org"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_pathless_url_parse_and_format() { - let url = ~"http://user:pass@rust-lang.org?q=v#something"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_scheme_host_fragment_only_url_parse_and_format() { - let url = ~"http://rust-lang.org#something"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_url_component_encoding() { - let url = ~"http://rust-lang.org/doc%20uments?ba%25d%20=%23%26%2B"; - let u = from_str(url).unwrap(); - assert!(u.path == ~"/doc uments"); - assert!(u.query == ~[(~"ba%d ", ~"#&+")]); - } - - #[test] - fn test_path_component_encoding() { - let path = ~"/doc%20uments?ba%25d%20=%23%26%2B"; - let p = path_from_str(path).unwrap(); - assert!(p.path == ~"/doc uments"); - assert!(p.query == ~[(~"ba%d ", ~"#&+")]); - } - - #[test] - fn test_url_without_authority() { - let url = ~"mailto:test@email.com"; - assert_eq!(from_str(url).unwrap().to_str(), url); - } - - #[test] - fn test_encode() { - assert_eq!(encode(""), ~""); - assert_eq!(encode("http://example.com"), ~"http://example.com"); - assert_eq!(encode("foo bar% baz"), ~"foo%20bar%25%20baz"); - assert_eq!(encode(" "), ~"%20"); - assert_eq!(encode("!"), ~"!"); - assert_eq!(encode("\""), ~"\""); - assert_eq!(encode("#"), ~"#"); - assert_eq!(encode("$"), ~"$"); - assert_eq!(encode("%"), ~"%25"); - assert_eq!(encode("&"), ~"&"); - assert_eq!(encode("'"), ~"%27"); - assert_eq!(encode("("), ~"("); - assert_eq!(encode(")"), ~")"); - assert_eq!(encode("*"), ~"*"); - assert_eq!(encode("+"), ~"+"); - assert_eq!(encode(","), ~","); - assert_eq!(encode("/"), ~"/"); - assert_eq!(encode(":"), ~":"); - assert_eq!(encode(";"), ~";"); - assert_eq!(encode("="), ~"="); - assert_eq!(encode("?"), ~"?"); - assert_eq!(encode("@"), ~"@"); - assert_eq!(encode("["), ~"["); - assert_eq!(encode("]"), ~"]"); - } - - #[test] - fn test_encode_component() { - assert_eq!(encode_component(""), ~""); - assert!(encode_component("http://example.com") == - ~"http%3A%2F%2Fexample.com"); - assert!(encode_component("foo bar% baz") == - ~"foo%20bar%25%20baz"); - assert_eq!(encode_component(" "), ~"%20"); - assert_eq!(encode_component("!"), ~"%21"); - assert_eq!(encode_component("#"), ~"%23"); - assert_eq!(encode_component("$"), ~"%24"); - assert_eq!(encode_component("%"), ~"%25"); - assert_eq!(encode_component("&"), ~"%26"); - assert_eq!(encode_component("'"), ~"%27"); - assert_eq!(encode_component("("), ~"%28"); - assert_eq!(encode_component(")"), ~"%29"); - assert_eq!(encode_component("*"), ~"%2A"); - assert_eq!(encode_component("+"), ~"%2B"); - assert_eq!(encode_component(","), ~"%2C"); - assert_eq!(encode_component("/"), ~"%2F"); - assert_eq!(encode_component(":"), ~"%3A"); - assert_eq!(encode_component(";"), ~"%3B"); - assert_eq!(encode_component("="), ~"%3D"); - assert_eq!(encode_component("?"), ~"%3F"); - assert_eq!(encode_component("@"), ~"%40"); - assert_eq!(encode_component("["), ~"%5B"); - assert_eq!(encode_component("]"), ~"%5D"); - } - - #[test] - fn test_decode() { - assert_eq!(decode(""), ~""); - assert_eq!(decode("abc/def 123"), ~"abc/def 123"); - assert_eq!(decode("abc%2Fdef%20123"), ~"abc%2Fdef 123"); - assert_eq!(decode("%20"), ~" "); - assert_eq!(decode("%21"), ~"%21"); - assert_eq!(decode("%22"), ~"%22"); - assert_eq!(decode("%23"), ~"%23"); - assert_eq!(decode("%24"), ~"%24"); - assert_eq!(decode("%25"), ~"%"); - assert_eq!(decode("%26"), ~"%26"); - assert_eq!(decode("%27"), ~"'"); - assert_eq!(decode("%28"), ~"%28"); - assert_eq!(decode("%29"), ~"%29"); - assert_eq!(decode("%2A"), ~"%2A"); - assert_eq!(decode("%2B"), ~"%2B"); - assert_eq!(decode("%2C"), ~"%2C"); - assert_eq!(decode("%2F"), ~"%2F"); - assert_eq!(decode("%3A"), ~"%3A"); - assert_eq!(decode("%3B"), ~"%3B"); - assert_eq!(decode("%3D"), ~"%3D"); - assert_eq!(decode("%3F"), ~"%3F"); - assert_eq!(decode("%40"), ~"%40"); - assert_eq!(decode("%5B"), ~"%5B"); - assert_eq!(decode("%5D"), ~"%5D"); - } - - #[test] - fn test_decode_component() { - assert_eq!(decode_component(""), ~""); - assert_eq!(decode_component("abc/def 123"), ~"abc/def 123"); - assert_eq!(decode_component("abc%2Fdef%20123"), ~"abc/def 123"); - assert_eq!(decode_component("%20"), ~" "); - assert_eq!(decode_component("%21"), ~"!"); - assert_eq!(decode_component("%22"), ~"\""); - assert_eq!(decode_component("%23"), ~"#"); - assert_eq!(decode_component("%24"), ~"$"); - assert_eq!(decode_component("%25"), ~"%"); - assert_eq!(decode_component("%26"), ~"&"); - assert_eq!(decode_component("%27"), ~"'"); - assert_eq!(decode_component("%28"), ~"("); - assert_eq!(decode_component("%29"), ~")"); - assert_eq!(decode_component("%2A"), ~"*"); - assert_eq!(decode_component("%2B"), ~"+"); - assert_eq!(decode_component("%2C"), ~","); - assert_eq!(decode_component("%2F"), ~"/"); - assert_eq!(decode_component("%3A"), ~":"); - assert_eq!(decode_component("%3B"), ~";"); - assert_eq!(decode_component("%3D"), ~"="); - assert_eq!(decode_component("%3F"), ~"?"); - assert_eq!(decode_component("%40"), ~"@"); - assert_eq!(decode_component("%5B"), ~"["); - assert_eq!(decode_component("%5D"), ~"]"); - } - - #[test] - fn test_encode_form_urlencoded() { - let mut m = HashMap::new(); - assert_eq!(encode_form_urlencoded(&m), ~""); - - m.insert(~"", ~[]); - m.insert(~"foo", ~[]); - assert_eq!(encode_form_urlencoded(&m), ~""); - - let mut m = HashMap::new(); - m.insert(~"foo", ~[~"bar", ~"123"]); - assert_eq!(encode_form_urlencoded(&m), ~"foo=bar&foo=123"); - - let mut m = HashMap::new(); - m.insert(~"foo bar", ~[~"abc", ~"12 = 34"]); - assert!(encode_form_urlencoded(&m) == - ~"foo+bar=abc&foo+bar=12+%3D+34"); - } - - #[test] - fn test_decode_form_urlencoded() { - assert_eq!(decode_form_urlencoded([]).len(), 0); - - let s = "a=1&foo+bar=abc&foo+bar=12+%3D+34".as_bytes(); - let form = decode_form_urlencoded(s); - assert_eq!(form.len(), 2); - assert_eq!(form.get(&~"a"), &~[~"1"]); - assert_eq!(form.get(&~"foo bar"), &~[~"abc", ~"12 = 34"]); - } -} diff --git a/src/libextra/workcache.rs b/src/libextra/workcache.rs deleted file mode 100644 index 274258daf16..00000000000 --- a/src/libextra/workcache.rs +++ /dev/null @@ -1,516 +0,0 @@ -// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#[allow(missing_doc)]; -#[allow(visible_private_types)]; - -use serialize::json; -use serialize::json::ToJson; -use serialize::{Encoder, Encodable, Decoder, Decodable}; -use sync::{Arc,RWArc}; -use collections::TreeMap; -use std::str; -use std::io; -use std::io::{File, MemWriter}; - -/** -* -* This is a loose clone of the [fbuild build system](https://github.com/felix-lang/fbuild), -* made a touch more generic (not wired to special cases on files) and much -* less metaprogram-y due to rust's comparative weakness there, relative to -* python. -* -* It's based around _imperative builds_ that happen to have some function -* calls cached. That is, it's _just_ a mechanism for describing cached -* functions. This makes it much simpler and smaller than a "build system" -* that produces an IR and evaluates it. The evaluation order is normal -* function calls. Some of them just return really quickly. -* -* A cached function consumes and produces a set of _works_. A work has a -* name, a kind (that determines how the value is to be checked for -* freshness) and a value. Works must also be (de)serializable. Some -* examples of works: -* -* kind name value -* ------------------------ -* cfg os linux -* file foo.c <sha1> -* url foo.com <etag> -* -* Works are conceptually single units, but we store them most of the time -* in maps of the form (type,name) => value. These are WorkMaps. -* -* A cached function divides the works it's interested in into inputs and -* outputs, and subdivides those into declared (input) works and -* discovered (input and output) works. -* -* A _declared_ input or is one that is given to the workcache before -* any work actually happens, in the "prep" phase. Even when a function's -* work-doing part (the "exec" phase) never gets called, it has declared -* inputs, which can be checked for freshness (and potentially -* used to determine that the function can be skipped). -* -* The workcache checks _all_ works for freshness, but uses the set of -* discovered outputs from the _previous_ exec (which it will re-discover -* and re-record each time the exec phase runs). -* -* Therefore the discovered works cached in the db might be a -* mis-approximation of the current discoverable works, but this is ok for -* the following reason: we assume that if an artifact A changed from -* depending on B,C,D to depending on B,C,D,E, then A itself changed (as -* part of the change-in-dependencies), so we will be ok. -* -* Each function has a single discriminated output work called its _result_. -* This is only different from other works in that it is returned, by value, -* from a call to the cacheable function; the other output works are used in -* passing to invalidate dependencies elsewhere in the cache, but do not -* otherwise escape from a function invocation. Most functions only have one -* output work anyways. -* -* A database (the central store of a workcache) stores a mappings: -* -* (fn_name,{declared_input}) => ({discovered_input}, -* {discovered_output},result) -* -* (Note: fbuild, which workcache is based on, has the concept of a declared -* output as separate from a discovered output. This distinction exists only -* as an artifact of how fbuild works: via annotations on function types -* and metaprogramming, with explicit dependency declaration as a fallback. -* Workcache is more explicit about dependencies, and as such treats all -* outputs the same, as discovered-during-the-last-run.) -* -*/ - -#[deriving(Clone, Eq, Encodable, Decodable, Ord, TotalOrd, TotalEq)] -struct WorkKey { - kind: ~str, - name: ~str -} - -impl WorkKey { - pub fn new(kind: &str, name: &str) -> WorkKey { - WorkKey { - kind: kind.to_owned(), - name: name.to_owned(), - } - } -} - -// FIXME #8883: The key should be a WorkKey and not a ~str. -// This is working around some JSON weirdness. -#[deriving(Clone, Eq, Encodable, Decodable)] -struct WorkMap(TreeMap<~str, KindMap>); - -#[deriving(Clone, Eq, Encodable, Decodable)] -struct KindMap(TreeMap<~str, ~str>); - -impl WorkMap { - fn new() -> WorkMap { WorkMap(TreeMap::new()) } - - fn insert_work_key(&mut self, k: WorkKey, val: ~str) { - let WorkKey { kind, name } = k; - let WorkMap(ref mut map) = *self; - match map.find_mut(&name) { - Some(&KindMap(ref mut m)) => { m.insert(kind, val); return; } - None => () - } - let mut new_map = TreeMap::new(); - new_map.insert(kind, val); - map.insert(name, KindMap(new_map)); - } -} - -pub struct Database { - priv db_filename: Path, - priv db_cache: TreeMap<~str, ~str>, - db_dirty: bool -} - -impl Database { - - pub fn new(p: Path) -> Database { - let mut rslt = Database { - db_filename: p, - db_cache: TreeMap::new(), - db_dirty: false - }; - if rslt.db_filename.exists() { - rslt.load(); - } - rslt - } - - pub fn prepare(&self, - fn_name: &str, - declared_inputs: &WorkMap) - -> Option<(WorkMap, WorkMap, ~str)> { - let k = json_encode(&(fn_name, declared_inputs)); - match self.db_cache.find(&k) { - None => None, - Some(v) => Some(json_decode(*v)) - } - } - - pub fn cache(&mut self, - fn_name: &str, - declared_inputs: &WorkMap, - discovered_inputs: &WorkMap, - discovered_outputs: &WorkMap, - result: &str) { - let k = json_encode(&(fn_name, declared_inputs)); - let v = json_encode(&(discovered_inputs, - discovered_outputs, - result)); - self.db_cache.insert(k,v); - self.db_dirty = true - } - - // FIXME #4330: This should have &mut self and should set self.db_dirty to false. - fn save(&self) -> io::IoResult<()> { - let mut f = File::create(&self.db_filename); - self.db_cache.to_json().to_pretty_writer(&mut f) - } - - fn load(&mut self) { - assert!(!self.db_dirty); - assert!(self.db_filename.exists()); - match File::open(&self.db_filename) { - Err(e) => fail!("Couldn't load workcache database {}: {}", - self.db_filename.display(), - e), - Ok(mut stream) => { - match json::from_reader(&mut stream) { - Err(e) => fail!("Couldn't parse workcache database (from file {}): {}", - self.db_filename.display(), e.to_str()), - Ok(r) => { - let mut decoder = json::Decoder::new(r); - self.db_cache = Decodable::decode(&mut decoder); - } - } - } - } - } -} - -#[unsafe_destructor] -impl Drop for Database { - fn drop(&mut self) { - if self.db_dirty { - // FIXME: is failing the right thing to do here - self.save().unwrap(); - } - } -} - -pub type FreshnessMap = TreeMap<~str,extern fn(&str,&str)->bool>; - -#[deriving(Clone)] -pub struct Context { - db: RWArc<Database>, - priv cfg: Arc<json::Object>, - /// Map from kinds (source, exe, url, etc.) to a freshness function. - /// The freshness function takes a name (e.g. file path) and value - /// (e.g. hash of file contents) and determines whether it's up-to-date. - /// For example, in the file case, this would read the file off disk, - /// hash it, and return the result of comparing the given hash and the - /// read hash for equality. - priv freshness: Arc<FreshnessMap> -} - -pub struct Prep<'a> { - priv ctxt: &'a Context, - priv fn_name: &'a str, - priv declared_inputs: WorkMap, -} - -pub struct Exec { - priv discovered_inputs: WorkMap, - priv discovered_outputs: WorkMap -} - -enum Work<'a, T> { - WorkValue(T), - WorkFromTask(&'a Prep<'a>, Receiver<(Exec, T)>), -} - -fn json_encode<'a, T:Encodable<json::Encoder<'a>>>(t: &T) -> ~str { - let mut writer = MemWriter::new(); - let mut encoder = json::Encoder::new(&mut writer as &mut io::Writer); - t.encode(&mut encoder); - str::from_utf8_owned(writer.unwrap()).unwrap() -} - -// FIXME(#5121) -fn json_decode<T:Decodable<json::Decoder>>(s: &str) -> T { - debug!("json decoding: {}", s); - let j = json::from_str(s).unwrap(); - let mut decoder = json::Decoder::new(j); - Decodable::decode(&mut decoder) -} - -impl Context { - - pub fn new(db: RWArc<Database>, - cfg: Arc<json::Object>) -> Context { - Context::new_with_freshness(db, cfg, Arc::new(TreeMap::new())) - } - - pub fn new_with_freshness(db: RWArc<Database>, - cfg: Arc<json::Object>, - freshness: Arc<FreshnessMap>) -> Context { - Context { - db: db, - cfg: cfg, - freshness: freshness - } - } - - pub fn prep<'a>(&'a self, fn_name: &'a str) -> Prep<'a> { - Prep::new(self, fn_name) - } - - pub fn with_prep<'a, - T>( - &'a self, - fn_name: &'a str, - blk: |p: &mut Prep| -> T) - -> T { - let mut p = self.prep(fn_name); - blk(&mut p) - } - -} - -impl Exec { - pub fn discover_input(&mut self, - dependency_kind: &str, - dependency_name: &str, - dependency_val: &str) { - debug!("Discovering input {} {} {}", dependency_kind, dependency_name, dependency_val); - self.discovered_inputs.insert_work_key(WorkKey::new(dependency_kind, dependency_name), - dependency_val.to_owned()); - } - pub fn discover_output(&mut self, - dependency_kind: &str, - dependency_name: &str, - dependency_val: &str) { - debug!("Discovering output {} {} {}", dependency_kind, dependency_name, dependency_val); - self.discovered_outputs.insert_work_key(WorkKey::new(dependency_kind, dependency_name), - dependency_val.to_owned()); - } - - // returns pairs of (kind, name) - pub fn lookup_discovered_inputs(&self) -> ~[(~str, ~str)] { - let mut rs = ~[]; - let WorkMap(ref discovered_inputs) = self.discovered_inputs; - for (k, v) in discovered_inputs.iter() { - let KindMap(ref vmap) = *v; - for (k1, _) in vmap.iter() { - rs.push((k1.clone(), k.clone())); - } - } - rs - } -} - -impl<'a> Prep<'a> { - fn new(ctxt: &'a Context, fn_name: &'a str) -> Prep<'a> { - Prep { - ctxt: ctxt, - fn_name: fn_name, - declared_inputs: WorkMap::new() - } - } - - pub fn lookup_declared_inputs(&self) -> ~[~str] { - let mut rs = ~[]; - let WorkMap(ref declared_inputs) = self.declared_inputs; - for (_, v) in declared_inputs.iter() { - let KindMap(ref vmap) = *v; - for (inp, _) in vmap.iter() { - rs.push(inp.clone()); - } - } - rs - } -} - -impl<'a> Prep<'a> { - pub fn declare_input(&mut self, kind: &str, name: &str, val: &str) { - debug!("Declaring input {} {} {}", kind, name, val); - self.declared_inputs.insert_work_key(WorkKey::new(kind, name), - val.to_owned()); - } - - fn is_fresh(&self, cat: &str, kind: &str, - name: &str, val: &str) -> bool { - let k = kind.to_owned(); - let f = self.ctxt.freshness.get().find(&k); - debug!("freshness for: {}/{}/{}/{}", cat, kind, name, val) - let fresh = match f { - None => fail!("missing freshness-function for '{}'", kind), - Some(f) => (*f)(name, val) - }; - if fresh { - info!("{} {}:{} is fresh", cat, kind, name); - } else { - info!("{} {}:{} is not fresh", cat, kind, name); - } - fresh - } - - fn all_fresh(&self, cat: &str, map: &WorkMap) -> bool { - let WorkMap(ref map) = *map; - for (k_name, kindmap) in map.iter() { - let KindMap(ref kindmap_) = *kindmap; - for (k_kind, v) in kindmap_.iter() { - if ! self.is_fresh(cat, *k_kind, *k_name, *v) { - return false; - } - } - } - return true; - } - - pub fn exec<'a, T:Send + - Encodable<json::Encoder<'a>> + - Decodable<json::Decoder>>( - &'a self, blk: proc(&mut Exec) -> T) -> T { - self.exec_work(blk).unwrap() - } - - fn exec_work<'a, T:Send + - Encodable<json::Encoder<'a>> + - Decodable<json::Decoder>>( // FIXME(#5121) - &'a self, blk: proc(&mut Exec) -> T) -> Work<'a, T> { - let mut bo = Some(blk); - - debug!("exec_work: looking up {} and {:?}", self.fn_name, - self.declared_inputs); - let cached = self.ctxt.db.read(|db| { - db.prepare(self.fn_name, &self.declared_inputs) - }); - - match cached { - Some((ref disc_in, ref disc_out, ref res)) - if self.all_fresh("declared input",&self.declared_inputs) && - self.all_fresh("discovered input", disc_in) && - self.all_fresh("discovered output", disc_out) => { - debug!("Cache hit!"); - debug!("Trying to decode: {:?} / {:?} / {}", - disc_in, disc_out, *res); - Work::from_value(json_decode(*res)) - } - - _ => { - debug!("Cache miss!"); - let (tx, rx) = channel(); - let blk = bo.take_unwrap(); - - // FIXME: What happens if the task fails? - spawn(proc() { - let mut exe = Exec { - discovered_inputs: WorkMap::new(), - discovered_outputs: WorkMap::new(), - }; - let v = blk(&mut exe); - tx.send((exe, v)); - }); - Work::from_task(self, rx) - } - } - } -} - -impl<'a, T:Send + - Encodable<json::Encoder<'a>> + - Decodable<json::Decoder>> - Work<'a, T> { // FIXME(#5121) - - pub fn from_value(elt: T) -> Work<'a, T> { - WorkValue(elt) - } - pub fn from_task(prep: &'a Prep<'a>, port: Receiver<(Exec, T)>) - -> Work<'a, T> { - WorkFromTask(prep, port) - } - - pub fn unwrap(self) -> T { - match self { - WorkValue(v) => v, - WorkFromTask(prep, port) => { - let (exe, v) = port.recv(); - let s = json_encode(&v); - prep.ctxt.db.write(|db| { - db.cache(prep.fn_name, - &prep.declared_inputs, - &exe.discovered_inputs, - &exe.discovered_outputs, - s) - }); - v - } - } - } -} - - -#[test] -#[cfg(not(target_os="android"))] // FIXME(#10455) -fn test() { - use std::os; - use std::io::{fs, Process}; - use std::str::from_utf8_owned; - - // Create a path to a new file 'filename' in the directory in which - // this test is running. - fn make_path(filename: ~str) -> Path { - let pth = os::self_exe_path().expect("workcache::test failed").with_filename(filename); - if pth.exists() { - fs::unlink(&pth).unwrap(); - } - return pth; - } - - let pth = make_path(~"foo.c"); - File::create(&pth).write(bytes!("int main() { return 0; }")).unwrap(); - - let db_path = make_path(~"db.json"); - - let cx = Context::new(RWArc::new(Database::new(db_path)), - Arc::new(TreeMap::new())); - - let s = cx.with_prep("test1", |prep| { - - let subcx = cx.clone(); - let pth = pth.clone(); - - let contents = File::open(&pth).read_to_end().unwrap(); - let file_content = from_utf8_owned(contents).unwrap(); - - // FIXME (#9639): This needs to handle non-utf8 paths - prep.declare_input("file", pth.as_str().unwrap(), file_content); - prep.exec(proc(_exe) { - let out = make_path(~"foo.o"); - // FIXME (#9639): This needs to handle non-utf8 paths - Process::status("gcc", [pth.as_str().unwrap().to_owned(), - ~"-o", - out.as_str().unwrap().to_owned()]).unwrap(); - - let _proof_of_concept = subcx.prep("subfn"); - // Could run sub-rules inside here. - - // FIXME (#9639): This needs to handle non-utf8 paths - out.as_str().unwrap().to_owned() - }) - }); - - println!("{}", s); -} |
