diff options
| author | Huon Wilson <dbau.pp+github@gmail.com> | 2013-06-09 23:10:50 +1000 |
|---|---|---|
| committer | Huon Wilson <dbau.pp+github@gmail.com> | 2013-06-10 23:02:54 +1000 |
| commit | 1e8982bdb26208d9d9ed4cdcbcd21cc9ef35bd46 (patch) | |
| tree | 54f03a318e14bcdbdb56e01b3c80d00a9db87a17 /src/libstd | |
| parent | 2ff6b298c5f23f48aa993fced41b6e29e446b7ce (diff) | |
| download | rust-1e8982bdb26208d9d9ed4cdcbcd21cc9ef35bd46.tar.gz rust-1e8982bdb26208d9d9ed4cdcbcd21cc9ef35bd46.zip | |
std: replace str::each_split* with an iterator
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/os.rs | 6 | ||||
| -rw-r--r-- | src/libstd/path.rs | 60 | ||||
| -rw-r--r-- | src/libstd/str.rs | 586 |
3 files changed, 215 insertions, 437 deletions
diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 6ce6d81bf24..2069e61f11e 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -30,6 +30,7 @@ use cast; use io; +use iterator::IteratorUtil; use libc; use libc::{c_char, c_void, c_int, size_t}; use libc::{mode_t, FILE}; @@ -224,12 +225,11 @@ pub fn env() -> ~[(~str,~str)] { fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] { let mut pairs = ~[]; for input.each |p| { - let mut vs = ~[]; - for str::each_splitn_char(*p, '=', 1) |s| { vs.push(s.to_owned()) } + let vs: ~[&str] = p.splitn_iter('=', 1).collect(); debug!("splitting: len: %u", vs.len()); assert_eq!(vs.len(), 2); - pairs.push((copy vs[0], copy vs[1])); + pairs.push((vs[0].to_owned(), vs[1].to_owned())); } pairs } diff --git a/src/libstd/path.rs b/src/libstd/path.rs index a551b9bf3c0..b2f25d41157 100644 --- a/src/libstd/path.rs +++ b/src/libstd/path.rs @@ -18,6 +18,7 @@ Cross-platform file path handling use container::Container; use cmp::Eq; +use iterator::IteratorUtil; use libc; use option::{None, Option, Some}; use str; @@ -449,10 +450,9 @@ impl ToStr for PosixPath { // PosixPath and WindowsPath, most of their methods are common. impl GenericPath for PosixPath { fn from_str(s: &str) -> PosixPath { - let mut components = ~[]; - for str::each_split_nonempty(s, |c| c == '/') |s| { - components.push(s.to_owned()) - } + let components = s.split_iter('/') + .filter_map(|s| if s.is_empty() {None} else {Some(s.to_owned())}) + .collect(); let is_absolute = (s.len() != 0 && s[0] == '/' as u8); PosixPath { is_absolute: is_absolute, @@ -508,7 +508,7 @@ impl GenericPath for PosixPath { } fn with_filename(&self, f: &str) -> PosixPath { - assert!(! str::any(f, |c| windows::is_sep(c as u8))); + assert!(! str::any(f, |c| windows::is_sep(c))); self.dir_path().push(f) } @@ -569,11 +569,11 @@ impl GenericPath for PosixPath { fn push_many(&self, cs: &[~str]) -> PosixPath { let mut v = copy self.components; for cs.each |e| { - let mut ss = ~[]; - for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| { - ss.push(s.to_owned()) + for e.split_iter(windows::is_sep).advance |s| { + if !s.is_empty() { + v.push(s.to_owned()) + } } - v.push_all_move(ss); } PosixPath { is_absolute: self.is_absolute, @@ -583,11 +583,11 @@ impl GenericPath for PosixPath { fn push(&self, s: &str) -> PosixPath { let mut v = copy self.components; - let mut ss = ~[]; - for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| { - ss.push(s.to_owned()) + for s.split_iter(windows::is_sep).advance |s| { + if !s.is_empty() { + v.push(s.to_owned()) + } } - v.push_all_move(ss); PosixPath { components: v, ..copy *self } } @@ -661,11 +661,11 @@ impl GenericPath for WindowsPath { } } - let mut components = ~[]; - for str::each_split_nonempty(rest, |c| windows::is_sep(c as u8)) |s| { - components.push(s.to_owned()) - } - let is_absolute = (rest.len() != 0 && windows::is_sep(rest[0])); + let components = rest.split_iter(windows::is_sep) + .filter_map(|s| if s.is_empty() {None} else {Some(s.to_owned())}) + .collect(); + + let is_absolute = (rest.len() != 0 && windows::is_sep(rest[0] as char)); WindowsPath { host: host, device: device, @@ -722,7 +722,7 @@ impl GenericPath for WindowsPath { } fn with_filename(&self, f: &str) -> WindowsPath { - assert!(! str::any(f, |c| windows::is_sep(c as u8))); + assert!(! str::any(f, |c| windows::is_sep(c))); self.dir_path().push(f) } @@ -826,11 +826,11 @@ impl GenericPath for WindowsPath { fn push_many(&self, cs: &[~str]) -> WindowsPath { let mut v = copy self.components; for cs.each |e| { - let mut ss = ~[]; - for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| { - ss.push(s.to_owned()) + for e.split_iter(windows::is_sep).advance |s| { + if !s.is_empty() { + v.push(s.to_owned()) + } } - v.push_all_move(ss); } // tedious, but as-is, we can't use ..self WindowsPath { @@ -843,11 +843,11 @@ impl GenericPath for WindowsPath { fn push(&self, s: &str) -> WindowsPath { let mut v = copy self.components; - let mut ss = ~[]; - for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| { - ss.push(s.to_owned()) + for s.split_iter(windows::is_sep).advance |s| { + if !s.is_empty() { + v.push(s.to_owned()) + } } - v.push_all_move(ss); WindowsPath { components: v, ..copy *self } } @@ -905,8 +905,8 @@ pub mod windows { use option::{None, Option, Some}; #[inline(always)] - pub fn is_sep(u: u8) -> bool { - u == '/' as u8 || u == '\\' as u8 + pub fn is_sep(u: char) -> bool { + u == '/' || u == '\\' } pub fn extract_unc_prefix(s: &str) -> Option<(~str,~str)> { @@ -915,7 +915,7 @@ pub mod windows { s[0] == s[1]) { let mut i = 2; while i < s.len() { - if is_sep(s[i]) { + if is_sep(s[i] as char) { let pre = s.slice(2, i).to_owned(); let rest = s.slice(i, s.len()).to_owned(); return Some((pre, rest)); diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 2af300fc1b8..930026fa4f7 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -25,7 +25,7 @@ use clone::Clone; use cmp::{TotalOrd, Ordering, Less, Equal, Greater}; use container::Container; use iter::Times; -use iterator::{Iterator, IteratorUtil}; +use iterator::{Iterator, IteratorUtil, FilterIterator}; use libc; use option::{None, Option, Some}; use old_iter::{BaseIter, EqIter}; @@ -633,128 +633,92 @@ pub fn slice<'a>(s: &'a str, begin: uint, end: uint) -> &'a str { unsafe { raw::slice_bytes(s, begin, end) } } -/// Splits a string into substrings at each occurrence of a given character -pub fn each_split_char<'a>(s: &'a str, sep: char, - it: &fn(&'a str) -> bool) -> bool { - each_split_char_inner(s, sep, len(s), true, true, it) -} +/// An iterator over the substrings of a string, separated by `sep`. +pub struct StrCharSplitIterator<'self,Sep> { + priv string: &'self str, + priv position: uint, + priv sep: Sep, + /// The number of splits remaining + priv count: uint, + /// Whether an empty string at the end is allowed + priv allow_trailing_empty: bool, + priv finished: bool, + priv only_ascii: bool +} + +/// An iterator over the words of a string, separated by an sequence of whitespace +pub type WordIterator<'self> = + FilterIterator<'self, &'self str, + StrCharSplitIterator<'self, extern "Rust" fn(char) -> bool>>; + +/// A separator for splitting a string character-wise +pub trait StrCharSplitSeparator { + /// Determine if the splitter should split at the given character + fn should_split(&self, char) -> bool; + /// Indicate if the splitter only uses ASCII characters, which + /// allows for a faster implementation. + fn only_ascii(&self) -> bool; +} +impl StrCharSplitSeparator for char { + #[inline(always)] + fn should_split(&self, c: char) -> bool { *self == c } -/// Like `each_split_char`, but a trailing empty string is omitted -pub fn each_split_char_no_trailing<'a>(s: &'a str, - sep: char, - it: &fn(&'a str) -> bool) -> bool { - each_split_char_inner(s, sep, len(s), true, false, it) + fn only_ascii(&self) -> bool { (*self as uint) < 128 } } +impl<'self> StrCharSplitSeparator for &'self fn(char) -> bool { + #[inline(always)] + fn should_split(&self, c: char) -> bool { (*self)(c) } -/** - * Splits a string into substrings at each occurrence of a given - * character up to 'count' times. - * - * The character must be a valid UTF-8/ASCII character - */ -pub fn each_splitn_char<'a>(s: &'a str, - sep: char, - count: uint, - it: &fn(&'a str) -> bool) -> bool { - each_split_char_inner(s, sep, count, true, true, it) + fn only_ascii(&self) -> bool { false } } +impl<'self> StrCharSplitSeparator for extern "Rust" fn(char) -> bool { + #[inline(always)] + fn should_split(&self, c: char) -> bool { (*self)(c) } -/// Like `each_split_char`, but omits empty strings -pub fn each_split_char_nonempty<'a>(s: &'a str, - sep: char, - it: &fn(&'a str) -> bool) -> bool { - each_split_char_inner(s, sep, len(s), false, false, it) + fn only_ascii(&self) -> bool { false } } -fn each_split_char_inner<'a>(s: &'a str, - sep: char, - count: uint, - allow_empty: bool, - allow_trailing_empty: bool, - it: &fn(&'a str) -> bool) -> bool { - if sep < 128u as char { - let (b, l) = (sep as u8, len(s)); - let mut done = 0u; - let mut (i, start) = (0u, 0u); - while i < l && done < count { - if s[i] == b { - if allow_empty || start < i { - if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { - return false; - } - } - start = i + 1u; - done += 1u; - } - i += 1u; - } - // only slice a non-empty trailing substring - if allow_trailing_empty || start < l { - if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return false; } - } - return true; - } - return each_split_inner(s, |cur| cur == sep, count, - allow_empty, allow_trailing_empty, it) -} +impl<'self, Sep: StrCharSplitSeparator> Iterator<&'self str> for StrCharSplitIterator<'self, Sep> { + fn next(&mut self) -> Option<&'self str> { + if self.finished { return None } -/// Splits a string into substrings using a character function -pub fn each_split<'a>(s: &'a str, - sepfn: &fn(char) -> bool, - it: &fn(&'a str) -> bool) -> bool { - each_split_inner(s, sepfn, len(s), true, true, it) -} + let l = self.string.len(); + let start = self.position; -/// Like `each_split`, but a trailing empty string is omitted -pub fn each_split_no_trailing<'a>(s: &'a str, - sepfn: &fn(char) -> bool, - it: &fn(&'a str) -> bool) -> bool { - each_split_inner(s, sepfn, len(s), true, false, it) -} + if self.only_ascii { + // this gives a *huge* speed up for splitting on ASCII + // characters (e.g. '\n' or ' ') + while self.position < l && self.count > 0 { + let byte = self.string[self.position]; -/** - * Splits a string into substrings using a character function, cutting at - * most `count` times. - */ -pub fn each_splitn<'a>(s: &'a str, - sepfn: &fn(char) -> bool, - count: uint, - it: &fn(&'a str) -> bool) -> bool { - each_split_inner(s, sepfn, count, true, true, it) -} - -/// Like `each_split`, but omits empty strings -pub fn each_split_nonempty<'a>(s: &'a str, - sepfn: &fn(char) -> bool, - it: &fn(&'a str) -> bool) -> bool { - each_split_inner(s, sepfn, len(s), false, false, it) -} - -fn each_split_inner<'a>(s: &'a str, - sepfn: &fn(cc: char) -> bool, - count: uint, - allow_empty: bool, - allow_trailing_empty: bool, - it: &fn(&'a str) -> bool) -> bool { - let l = len(s); - let mut (i, start, done) = (0u, 0u, 0u); - while i < l && done < count { - let CharRange {ch, next} = char_range_at(s, i); - if sepfn(ch) { - if allow_empty || start < i { - if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { - return false; + if self.sep.should_split(byte as char) { + let slice = unsafe { raw::slice_bytes(self.string, start, self.position) }; + self.position += 1; + self.count -= 1; + return Some(slice); } + self.position += 1; + } + } else { + while self.position < l && self.count > 0 { + let CharRange {ch, next} = char_range_at(self.string, self.position); + + if self.sep.should_split(ch) { + let slice = unsafe { raw::slice_bytes(self.string, start, self.position) }; + self.position = next; + self.count -= 1; + return Some(slice); + } + self.position = next; } - start = next; - done += 1u; } - i = next; - } - if allow_trailing_empty || start < l { - if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return false; } + self.finished = true; + if self.allow_trailing_empty || start < l { + Some(unsafe { raw::slice_bytes(self.string, start, l) }) + } else { + None + } } - return true; } // See Issue #1932 for why this is a naive search @@ -876,18 +840,11 @@ pub fn levdistance(s: &str, t: &str) -> uint { } /** - * Splits a string into substrings separated by LF ('\n'). - */ -pub fn each_line<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool { - each_split_char_no_trailing(s, '\n', it) -} - -/** * Splits a string into substrings separated by LF ('\n') * and/or CR LF ("\r\n") */ pub fn each_line_any<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool { - for each_line(s) |s| { + for s.line_iter().advance |s| { let l = s.len(); if l > 0u && s[l - 1u] == '\r' as u8 { if !it( unsafe { raw::slice_bytes(s, 0, l - 1) } ) { return false; } @@ -898,11 +855,6 @@ pub fn each_line_any<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool { return true; } -/// Splits a string into substrings separated by whitespace -pub fn each_word<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool { - each_split_nonempty(s, char::is_whitespace, it) -} - /** Splits a string into substrings with possibly internal whitespace, * each of them at most `lim` bytes long. The substrings have leading and trailing * whitespace removed, and are only cut at whitespace boundaries. @@ -2216,7 +2168,7 @@ pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T { * ~~~ {.rust} * let string = "a\nb\nc"; * let mut lines = ~[]; - * for each_line(string) |line| { lines.push(line) } + * for string.line_iter().advance |line| { lines.push(line) } * * assert!(subslice_offset(string, lines[0]) == 0); // &"a" * assert!(subslice_offset(string, lines[1]) == 2); // &"b" @@ -2523,6 +2475,18 @@ pub trait StrSlice<'self> { fn rev_iter(&self) -> StrCharRevIterator<'self>; fn bytes_iter(&self) -> StrBytesIterator<'self>; fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self>; + fn split_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep>; + fn splitn_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, count: uint) + -> StrCharSplitIterator<'self, Sep>; + fn split_options_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, + count: uint, allow_trailing_empty: bool) + -> StrCharSplitIterator<'self, Sep>; + /// An iterator over the lines of a string (subsequences separated + /// by `\n`). + fn line_iter(&self) -> StrCharSplitIterator<'self, char>; + /// An iterator over the words of a string (subsequences separated + /// by any sequence of whitespace). + fn word_iter(&self) -> WordIterator<'self>; fn ends_with(&self, needle: &str) -> bool; fn is_empty(&self) -> bool; fn is_whitespace(&self) -> bool; @@ -2530,8 +2494,6 @@ pub trait StrSlice<'self> { fn len(&self) -> uint; fn char_len(&self) -> uint; fn slice(&self, begin: uint, end: uint) -> &'self str; - fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) -> bool; - fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) -> bool; fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool) -> bool; fn starts_with<'a>(&self, needle: &'a str) -> bool; fn substr(&self, begin: uint, n: uint) -> &'self str; @@ -2597,6 +2559,36 @@ impl<'self> StrSlice<'self> for &'self str { StrBytesRevIterator { it: as_bytes_slice(*self).rev_iter() } } + fn split_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep> { + self.split_options_iter(sep, self.len(), true) + } + + fn splitn_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, count: uint) + -> StrCharSplitIterator<'self, Sep> { + self.split_options_iter(sep, count, true) + } + fn split_options_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, + count: uint, allow_trailing_empty: bool) + -> StrCharSplitIterator<'self, Sep> { + let only_ascii = sep.only_ascii(); + StrCharSplitIterator { + string: *self, + position: 0, + sep: sep, + count: count, + allow_trailing_empty: allow_trailing_empty, + finished: false, + only_ascii: only_ascii + } + } + + fn line_iter(&self) -> StrCharSplitIterator<'self, char> { + self.split_options_iter('\n', self.len(), false) + } + fn word_iter(&self) -> WordIterator<'self> { + self.split_iter(char::is_whitespace).filter(|s| !s.is_empty()) + } + /// Returns true if one string ends with another #[inline] @@ -2637,18 +2629,6 @@ impl<'self> StrSlice<'self> for &'self str { fn slice(&self, begin: uint, end: uint) -> &'self str { slice(*self, begin, end) } - /// Splits a string into substrings using a character function - #[inline] - fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) -> bool { - each_split(*self, sepfn, it) - } - /** - * Splits a string into substrings at each occurrence of a given character - */ - #[inline] - fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) -> bool { - each_split_char(*self, sep, it) - } /** * Splits a string into a vector of the substrings separated by a given * string @@ -2905,131 +2885,6 @@ mod tests { } #[test] - fn test_split_char() { - fn t(s: &str, c: char, u: &[~str]) { - debug!("split_byte: %?", s); - let mut v = ~[]; - for each_split_char(s, c) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - t("abc.hello.there", '.', [~"abc", ~"hello", ~"there"]); - t(".hello.there", '.', [~"", ~"hello", ~"there"]); - t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there", ~""]); - - t("", 'z', [~""]); - t("z", 'z', [~"",~""]); - t("ok", 'z', [~"ok"]); - } - - #[test] - fn test_split_char_2() { - fn t(s: &str, c: char, u: &[~str]) { - debug!("split_byte: %?", s); - let mut v = ~[]; - for each_split_char(s, c) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - let data = "ประเทศไทย中华Việt Nam"; - t(data, 'V', [~"ประเทศไทย中华", ~"iệt Nam"]); - t(data, 'ท', [~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]); - } - - #[test] - fn test_splitn_char() { - fn t(s: &str, c: char, n: uint, u: &[~str]) { - debug!("splitn_byte: %?", s); - let mut v = ~[]; - for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - debug!("comparing vs. %?", u); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - t("abc.hello.there", '.', 0u, [~"abc.hello.there"]); - t("abc.hello.there", '.', 1u, [~"abc", ~"hello.there"]); - t("abc.hello.there", '.', 2u, [~"abc", ~"hello", ~"there"]); - t("abc.hello.there", '.', 3u, [~"abc", ~"hello", ~"there"]); - t(".hello.there", '.', 0u, [~".hello.there"]); - t(".hello.there", '.', 1u, [~"", ~"hello.there"]); - t("...hello.there.", '.', 3u, [~"", ~"", ~"", ~"hello.there."]); - t("...hello.there.", '.', 5u, [~"", ~"", ~"", ~"hello", ~"there", ~""]); - - t("", 'z', 5u, [~""]); - t("z", 'z', 5u, [~"",~""]); - t("ok", 'z', 5u, [~"ok"]); - t("z", 'z', 0u, [~"z"]); - t("w.x.y", '.', 0u, [~"w.x.y"]); - t("w.x.y", '.', 1u, [~"w",~"x.y"]); - } - - #[test] - fn test_splitn_char_2() { - fn t(s: &str, c: char, n: uint, u: &[~str]) { - debug!("splitn_byte: %?", s); - let mut v = ~[]; - for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - debug!("comparing vs. %?", u); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - - t("ประเทศไทย中华Việt Nam", '华', 1u, [~"ประเทศไทย中", ~"Việt Nam"]); - t("zzXXXzYYYzWWWz", 'z', 3u, [~"", ~"", ~"XXX", ~"YYYzWWWz"]); - t("z", 'z', 5u, [~"",~""]); - t("", 'z', 5u, [~""]); - t("ok", 'z', 5u, [~"ok"]); - } - - #[test] - fn test_splitn_char_3() { - fn t(s: &str, c: char, n: uint, u: &[~str]) { - debug!("splitn_byte: %?", s); - let mut v = ~[]; - for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - debug!("comparing vs. %?", u); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - let data = "ประเทศไทย中华Việt Nam"; - t(data, 'V', 1u, [~"ประเทศไทย中华", ~"iệt Nam"]); - t(data, 'ท', 1u, [~"ประเ", ~"ศไทย中华Việt Nam"]); - } - - #[test] - fn test_split_char_no_trailing() { - fn t(s: &str, c: char, u: &[~str]) { - debug!("split_byte: %?", s); - let mut v = ~[]; - for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - t("abc.hello.there", '.', [~"abc", ~"hello", ~"there"]); - t(".hello.there", '.', [~"", ~"hello", ~"there"]); - t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there"]); - - t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there"]); - t("", 'z', []); - t("z", 'z', [~""]); - t("ok", 'z', [~"ok"]); - } - - #[test] - fn test_split_char_no_trailing_2() { - fn t(s: &str, c: char, u: &[~str]) { - debug!("split_byte: %?", s); - let mut v = ~[]; - for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - let data = "ประเทศไทย中华Việt Nam"; - t(data, 'V', [~"ประเทศไทย中华", ~"iệt Nam"]); - t(data, 'ท', [~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]); - } - - #[test] fn test_split_str() { fn t<'a>(s: &str, sep: &'a str, u: &[~str]) { let mut v = ~[]; @@ -3054,75 +2909,6 @@ mod tests { #[test] - fn test_split() { - fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) { - let mut v = ~[]; - for each_split(s, sepf) |s| { v.push(s.to_owned()) } - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - - t("ประเทศไทย中华Việt Nam", |cc| cc == '华', [~"ประเทศไทย中", ~"Việt Nam"]); - t("zzXXXzYYYz", char::is_lowercase, [~"", ~"", ~"XXX", ~"YYY", ~""]); - t("zzXXXzYYYz", char::is_uppercase, [~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]); - t("z", |cc| cc == 'z', [~"",~""]); - t("", |cc| cc == 'z', [~""]); - t("ok", |cc| cc == 'z', [~"ok"]); - } - - #[test] - fn test_split_no_trailing() { - fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) { - let mut v = ~[]; - for each_split_no_trailing(s, sepf) |s| { v.push(s.to_owned()) } - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - - t("ประเทศไทย中华Việt Nam", |cc| cc == '华', [~"ประเทศไทย中", ~"Việt Nam"]); - t("zzXXXzYYYz", char::is_lowercase, [~"", ~"", ~"XXX", ~"YYY"]); - t("zzXXXzYYYz", char::is_uppercase, [~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]); - t("z", |cc| cc == 'z', [~""]); - t("", |cc| cc == 'z', []); - t("ok", |cc| cc == 'z', [~"ok"]); - } - - #[test] - fn test_lines() { - let lf = "\nMary had a little lamb\nLittle lamb\n"; - let crlf = "\r\nMary had a little lamb\r\nLittle lamb\r\n"; - - fn t(s: &str, f: &fn(&str, &fn(&str) -> bool) -> bool, u: &[~str]) { - let mut v = ~[]; - for f(s) |s| { v.push(s.to_owned()) } - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - - t(lf, each_line, [~"", ~"Mary had a little lamb", ~"Little lamb"]); - t(lf, each_line_any, [~"", ~"Mary had a little lamb", ~"Little lamb"]); - t(crlf, each_line, [~"\r", ~"Mary had a little lamb\r", ~"Little lamb\r"]); - t(crlf, each_line_any, [~"", ~"Mary had a little lamb", ~"Little lamb"]); - t("", each_line, []); - t("", each_line_any, []); - t("\n", each_line, [~""]); - t("\n", each_line_any, [~""]); - t("banana", each_line, [~"banana"]); - t("banana", each_line_any, [~"banana"]); - } - - #[test] - fn test_words() { - fn t(s: &str, f: &fn(&str, &fn(&str) -> bool) -> bool, u: &[~str]) { - let mut v = ~[]; - for f(s) |s| { v.push(s.to_owned()) } - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - let data = "\nMary had a little lamb\nLittle lamb\n"; - - t(data, each_word, [~"Mary",~"had",~"a",~"little",~"lamb",~"Little",~"lamb"]); - t("ok", each_word, [~"ok"]); - t("", each_word, []); - } - - #[test] fn test_split_within() { fn t(s: &str, i: uint, u: &[~str]) { let mut v = ~[]; @@ -3671,7 +3457,7 @@ mod tests { let string = "a\nb\nc"; let mut lines = ~[]; - for each_line(string) |line| { lines.push(line) } + for string.line_iter().advance |line| { lines.push(line) } assert_eq!(subslice_offset(string, lines[0]), 0); assert_eq!(subslice_offset(string, lines[1]), 2); assert_eq!(subslice_offset(string, lines[2]), 4); @@ -3731,78 +3517,6 @@ mod tests { } #[test] - fn test_split_char_each() { - let data = "\nMary had a little lamb\nLittle lamb\n"; - - let mut ii = 0; - - for each_split_char(data, ' ') |xx| { - match ii { - 0 => assert!("\nMary" == xx), - 1 => assert!("had" == xx), - 2 => assert!("a" == xx), - 3 => assert!("little" == xx), - _ => () - } - ii += 1; - } - } - - #[test] - fn test_splitn_char_each() { - let data = "\nMary had a little lamb\nLittle lamb\n"; - - let mut ii = 0; - - for each_splitn_char(data, ' ', 2u) |xx| { - match ii { - 0 => assert!("\nMary" == xx), - 1 => assert!("had" == xx), - 2 => assert!("a little lamb\nLittle lamb\n" == xx), - _ => () - } - ii += 1; - } - } - - #[test] - fn test_words_each() { - let data = "\nMary had a little lamb\nLittle lamb\n"; - - let mut ii = 0; - - for each_word(data) |ww| { - match ii { - 0 => assert!("Mary" == ww), - 1 => assert!("had" == ww), - 2 => assert!("a" == ww), - 3 => assert!("little" == ww), - _ => () - } - ii += 1; - } - - each_word("", |_x| fail!()); // should not fail - } - - #[test] - fn test_lines_each () { - let lf = "\nMary had a little lamb\nLittle lamb\n"; - - let mut ii = 0; - - for each_line(lf) |x| { - match ii { - 0 => assert!("" == x), - 1 => assert!("Mary had a little lamb" == x), - 2 => assert!("Little lamb" == x), - _ => () - } - ii += 1; - } - } - - #[test] fn test_map() { assert_eq!(~"", map("", |c| unsafe {libc::toupper(c as c_char)} as char)); assert_eq!(~"YMCA", map("ymca", |c| unsafe {libc::toupper(c as c_char)} as char)); @@ -4015,4 +3729,68 @@ mod tests { assert_eq!(b, v[pos]); } } + + #[test] + fn test_split_char_iterator() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: ~[&str] = data.split_iter(' ').collect(); + assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect(); + assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + // Unicode + let split: ~[&str] = data.split_iter('ä').collect(); + assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + + let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect(); + assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + } + #[test] + fn test_splitn_char_iterator() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: ~[&str] = data.splitn_iter(' ', 3).collect(); + assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]); + + let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect(); + assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]); + + // Unicode + let split: ~[&str] = data.splitn_iter('ä', 3).collect(); + assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]); + + let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect(); + assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]); + } + + #[test] + fn test_split_char_iterator_no_trailing() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect(); + assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]); + + let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect(); + assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]); + } + + #[test] + fn test_word_iter() { + let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n"; + let words: ~[&str] = data.word_iter().collect(); + assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"]) + } + + #[test] + fn test_line_iter() { + let data = "\nMäry häd ä little lämb\n\nLittle lämb\n"; + let lines: ~[&str] = data.line_iter().collect(); + assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]); + + let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n + let lines: ~[&str] = data.line_iter().collect(); + assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]); + } } |
