diff options
| author | Huon Wilson <dbau.pp+github@gmail.com> | 2013-06-09 23:10:50 +1000 |
|---|---|---|
| committer | Huon Wilson <dbau.pp+github@gmail.com> | 2013-06-10 23:02:54 +1000 |
| commit | 1e8982bdb26208d9d9ed4cdcbcd21cc9ef35bd46 (patch) | |
| tree | 54f03a318e14bcdbdb56e01b3c80d00a9db87a17 | |
| parent | 2ff6b298c5f23f48aa993fced41b6e29e446b7ce (diff) | |
| download | rust-1e8982bdb26208d9d9ed4cdcbcd21cc9ef35bd46.tar.gz rust-1e8982bdb26208d9d9ed4cdcbcd21cc9ef35bd46.zip | |
std: replace str::each_split* with an iterator
| -rw-r--r-- | src/compiletest/header.rs | 4 | ||||
| -rw-r--r-- | src/compiletest/runtest.rs | 19 | ||||
| -rw-r--r-- | src/libextra/fileinput.rs | 5 | ||||
| -rw-r--r-- | src/libextra/getopts.rs | 10 | ||||
| -rw-r--r-- | src/libextra/net_ip.rs | 15 | ||||
| -rw-r--r-- | src/libextra/net_url.rs | 2 | ||||
| -rw-r--r-- | src/libextra/num/rational.rs | 15 | ||||
| -rw-r--r-- | src/libextra/terminfo/parser/compiled.rs | 6 | ||||
| -rw-r--r-- | src/libextra/terminfo/searcher.rs | 5 | ||||
| -rw-r--r-- | src/librustc/driver/driver.rs | 16 | ||||
| -rw-r--r-- | src/librustc/metadata/cstore.rs | 3 | ||||
| -rw-r--r-- | src/librusti/rusti.rc | 6 | ||||
| -rw-r--r-- | src/librustpkg/path_util.rs | 3 | ||||
| -rw-r--r-- | src/librustpkg/rustpkg.rc | 7 | ||||
| -rw-r--r-- | src/libstd/os.rs | 6 | ||||
| -rw-r--r-- | src/libstd/path.rs | 60 | ||||
| -rw-r--r-- | src/libstd/str.rs | 586 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 11 | ||||
| -rw-r--r-- | src/test/bench/sudoku.rs | 5 |
19 files changed, 269 insertions, 515 deletions
diff --git a/src/compiletest/header.rs b/src/compiletest/header.rs index b147dc806e2..782571fc679 100644 --- a/src/compiletest/header.rs +++ b/src/compiletest/header.rs @@ -141,8 +141,8 @@ fn parse_check_line(line: &str) -> Option<~str> { fn parse_exec_env(line: &str) -> Option<(~str, ~str)> { do parse_name_value_directive(line, ~"exec-env").map |nv| { // nv is either FOO or FOO=BAR - let mut strs = ~[]; - for str::each_splitn_char(*nv, '=', 1u) |s| { strs.push(s.to_owned()); } + let mut strs: ~[~str] = nv.splitn_iter('=', 1).transform(|s| s.to_owned()).collect(); + match strs.len() { 1u => (strs.pop(), ~""), 2u => { diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs index b4a20886ad8..444f4c8d539 100644 --- a/src/compiletest/runtest.rs +++ b/src/compiletest/runtest.rs @@ -278,7 +278,7 @@ fn run_debuginfo_test(config: &config, props: &TestProps, testfile: &Path) { // check if each line in props.check_lines appears in the // output (in order) let mut i = 0u; - for str::each_line(ProcRes.stdout) |line| { + for ProcRes.stdout.line_iter().advance |line| { if check_lines[i].trim() == line.trim() { i += 1u; } @@ -308,7 +308,7 @@ fn check_error_patterns(props: &TestProps, let mut next_err_idx = 0u; let mut next_err_pat = &props.error_patterns[next_err_idx]; let mut done = false; - for str::each_line(ProcRes.stderr) |line| { + for ProcRes.stderr.line_iter().advance |line| { if str::contains(line, *next_err_pat) { debug!("found error pattern %s", *next_err_pat); next_err_idx += 1u; @@ -358,7 +358,7 @@ fn check_expected_errors(expected_errors: ~[errors::ExpectedError], // filename:line1:col1: line2:col2: *warning:* msg // where line1:col1: is the starting point, line2:col2: // is the ending point, and * represents ANSI color codes. - for str::each_line(ProcRes.stderr) |line| { + for ProcRes.stderr.line_iter().advance |line| { let mut was_expected = false; for vec::eachi(expected_errors) |i, ee| { if !found_flags[i] { @@ -612,15 +612,11 @@ fn make_run_args(config: &config, _props: &TestProps, testfile: &Path) -> } fn split_maybe_args(argstr: &Option<~str>) -> ~[~str] { - fn rm_whitespace(v: ~[~str]) -> ~[~str] { - v.filtered(|s| !str::is_whitespace(*s)) - } - match *argstr { Some(ref s) => { - let mut ss = ~[]; - for str::each_split_char(*s, ' ') |s| { ss.push(s.to_owned()) } - rm_whitespace(ss) + s.split_iter(' ') + .filter_map(|s| if s.is_whitespace() {None} else {Some(s.to_owned())}) + .collect() } None => ~[] } @@ -739,8 +735,7 @@ fn _arm_exec_compiled_test(config: &config, props: &TestProps, let cmdline = make_cmdline("", args.prog, args.args); // get bare program string - let mut tvec = ~[]; - for str::each_split_char(args.prog, '/') |ts| { tvec.push(ts.to_owned()) } + let tvec: ~[~str] = args.prog.split_iter('/').transform(|ts| ts.to_owned()).collect(); let prog_short = tvec.pop(); // copy to target diff --git a/src/libextra/fileinput.rs b/src/libextra/fileinput.rs index 16082732715..add857ca9ed 100644 --- a/src/libextra/fileinput.rs +++ b/src/libextra/fileinput.rs @@ -416,7 +416,6 @@ mod test { use core::iterator::IteratorUtil; use core::io; - use core::str; use core::uint; use core::vec; @@ -527,9 +526,7 @@ mod test { } for input_vec_state(filenames) |line, state| { - let nums = do vec::build |p| { - for str::each_split_char(line, ' ') |s| { p(s.to_owned()); } - }; + let nums: ~[&str] = line.split_iter(' ').collect(); let file_num = uint::from_str(nums[0]).get(); let line_num = uint::from_str(nums[1]).get(); assert_eq!(line_num, state.line_num_file); diff --git a/src/libextra/getopts.rs b/src/libextra/getopts.rs index 2bc0a8ef6b0..af026473a1b 100644 --- a/src/libextra/getopts.rs +++ b/src/libextra/getopts.rs @@ -82,6 +82,7 @@ use core::prelude::*; +use core::iterator::IteratorUtil; use core::cmp::Eq; use core::result::{Err, Ok}; use core::result; @@ -247,14 +248,13 @@ pub fn getopts(args: &[~str], opts: &[Opt]) -> Result { let mut i_arg = None; if cur[1] == '-' as u8 { let tail = str::slice(cur, 2, curlen); - let mut tail_eq = ~[]; - for str::each_splitn_char(tail, '=', 1) |s| { tail_eq.push(s.to_owned()) } + let tail_eq: ~[&str] = tail.split_iter('=').collect(); if tail_eq.len() <= 1 { names = ~[Long(tail.to_owned())]; } else { names = - ~[Long(copy tail_eq[0])]; - i_arg = Some(copy tail_eq[1]); + ~[Long(tail_eq[0].to_owned())]; + i_arg = Some(tail_eq[1].to_owned()); } } else { let mut j = 1; @@ -635,7 +635,7 @@ pub mod groups { // Normalize desc to contain words separated by one space character let mut desc_normalized_whitespace = ~""; - for str::each_word(desc) |word| { + for desc.word_iter().advance |word| { desc_normalized_whitespace.push_str(word); desc_normalized_whitespace.push_char(' '); } diff --git a/src/libextra/net_ip.rs b/src/libextra/net_ip.rs index ddbf09e40eb..760fc7485e0 100644 --- a/src/libextra/net_ip.rs +++ b/src/libextra/net_ip.rs @@ -14,6 +14,7 @@ use core::prelude::*; +use core::iterator::IteratorUtil; use core::libc; use core::comm::{stream, SharedChan}; use core::ptr; @@ -158,9 +159,7 @@ pub mod v4 { use core::cast::transmute; use core::result; - use core::str; use core::uint; - use core::vec; /** * Convert a str to `ip_addr` @@ -199,14 +198,12 @@ pub mod v4 { } } pub fn parse_to_ipv4_rep(ip: &str) -> result::Result<Ipv4Rep, ~str> { - let mut parts = ~[]; - for str::each_split_char(ip, '.') |s| { parts.push(s.to_owned()) } - let parts = vec::map(parts, |s| { - match uint::from_str(*s) { - Some(n) if n <= 255 => n, - _ => 256 + let parts: ~[uint] = ip.split_iter('.').transform(|s| { + match uint::from_str(s) { + Some(n) if n <= 255 => n, + _ => 256 } - }); + }).collect(); if parts.len() != 4 { Err(fmt!("'%s' doesn't have 4 parts", ip)) } else if parts.contains(&256) { diff --git a/src/libextra/net_url.rs b/src/libextra/net_url.rs index f26019d9282..f7e86b00d23 100644 --- a/src/libextra/net_url.rs +++ b/src/libextra/net_url.rs @@ -334,7 +334,7 @@ fn userinfo_to_str(userinfo: &UserInfo) -> ~str { fn query_from_str(rawquery: &str) -> Query { let mut query: Query = ~[]; if str::len(rawquery) != 0 { - for str::each_split_char(rawquery, '&') |p| { + for rawquery.split_iter('&').advance |p| { let (k, v) = split_char_first(p, '='); query.push((decode_component(k), decode_component(v))); }; diff --git a/src/libextra/num/rational.rs b/src/libextra/num/rational.rs index 1a8ab75b3dd..08fbb8aacc9 100644 --- a/src/libextra/num/rational.rs +++ b/src/libextra/num/rational.rs @@ -12,11 +12,10 @@ use core::prelude::*; +use core::iterator::IteratorUtil; use core::cmp; use core::from_str::FromStr; use core::num::{Zero,One,ToStrRadix,FromStrRadix,Round}; -use core::str; -use core::vec; use super::bigint::BigInt; /// Represents the ratio between 2 numbers. @@ -252,11 +251,7 @@ impl<T: FromStr + Clone + Integer + Ord> FromStr for Ratio<T> { /// Parses `numer/denom`. fn from_str(s: &str) -> Option<Ratio<T>> { - let split = vec::build(|push| { - for str::each_splitn_char(s, '/', 1) |s| { - push(s.to_owned()); - } - }); + let split: ~[&str] = s.splitn_iter('/', 1).collect(); if split.len() < 2 { return None; } do FromStr::from_str::<T>(split[0]).chain |a| { do FromStr::from_str::<T>(split[1]).chain |b| { @@ -269,11 +264,7 @@ impl<T: FromStrRadix + Clone + Integer + Ord> FromStrRadix for Ratio<T> { /// Parses `numer/denom` where the numbers are in base `radix`. fn from_str_radix(s: &str, radix: uint) -> Option<Ratio<T>> { - let split = vec::build(|push| { - for str::each_splitn_char(s, '/', 1) |s| { - push(s.to_owned()); - } - }); + let split: ~[&str] = s.splitn_iter('/', 1).collect(); if split.len() < 2 { None } else { do FromStrRadix::from_str_radix::<T>(split[0], radix).chain |a| { diff --git a/src/libextra/terminfo/parser/compiled.rs b/src/libextra/terminfo/parser/compiled.rs index 61c68c27fe5..690596e6248 100644 --- a/src/libextra/terminfo/parser/compiled.rs +++ b/src/libextra/terminfo/parser/compiled.rs @@ -14,6 +14,7 @@ use core::prelude::*; use core::{vec, int, str}; use core::io::Reader; +use core::iterator::IteratorUtil; use core::hashmap::HashMap; use super::super::TermInfo; @@ -212,11 +213,8 @@ pub fn parse(file: @Reader, longnames: bool) -> Result<~TermInfo, ~str> { return Err(~"incompatible file: more string offsets than expected"); } - let mut term_names: ~[~str] = vec::with_capacity(2); let names_str = str::from_bytes(file.read_bytes(names_bytes as uint - 1)); // don't read NUL - for names_str.each_split_char('|') |s| { - term_names.push(s.to_owned()); - } + let term_names: ~[~str] = names_str.split_iter('|').transform(|s| s.to_owned()).collect(); file.read_byte(); // consume NUL diff --git a/src/libextra/terminfo/searcher.rs b/src/libextra/terminfo/searcher.rs index d6577cf3b94..d109bb12e02 100644 --- a/src/libextra/terminfo/searcher.rs +++ b/src/libextra/terminfo/searcher.rs @@ -12,9 +12,10 @@ /// Does not support hashed database, only filesystem! use core::prelude::*; -use core::{os, str}; +use core::{os}; use core::os::getenv; use core::io::{file_reader, Reader}; +use core::iterator::IteratorUtil; use path = core::path::Path; /// Return path to database entry for `term` @@ -36,7 +37,7 @@ pub fn get_dbpath_for_term(term: &str) -> Option<~path> { dirs_to_search.push(homedir.unwrap().push(".terminfo")); // ncurses compatability } match getenv("TERMINFO_DIRS") { - Some(dirs) => for str::each_split_char(dirs, ':') |i| { + Some(dirs) => for dirs.split_iter(':').advance |i| { if i == "" { dirs_to_search.push(path("/usr/share/terminfo")); } else { diff --git a/src/librustc/driver/driver.rs b/src/librustc/driver/driver.rs index 928f7d5a791..d765bcff9e2 100644 --- a/src/librustc/driver/driver.rs +++ b/src/librustc/driver/driver.rs @@ -24,6 +24,7 @@ use middle; use util::common::time; use util::ppaux; +use core::iterator::IteratorUtil; use core::hashmap::HashMap; use core::int; use core::io; @@ -684,11 +685,7 @@ pub fn build_session_options(binary: @~str, let addl_lib_search_paths = getopts::opt_strs(matches, "L").map(|s| Path(*s)); let linker = getopts::opt_maybe_str(matches, "linker"); let linker_args = getopts::opt_strs(matches, "link-args").flat_map( |a| { - let mut args = ~[]; - for str::each_split_char(*a, ' ') |arg| { - args.push(str::to_owned(arg)); - } - args + a.split_iter(' ').transform(|arg| arg.to_owned()).collect() }); let cfg = parse_cfgspecs(getopts::opt_strs(matches, "cfg"), demitter); @@ -699,12 +696,9 @@ pub fn build_session_options(binary: @~str, let custom_passes = match getopts::opt_maybe_str(matches, "passes") { None => ~[], Some(s) => { - let mut o = ~[]; - for s.each_split(|c| c == ' ' || c == ',') |s| { - let s = s.trim().to_owned(); - o.push(s); - } - o + s.split_iter(|c: char| c == ' ' || c == ',').transform(|s| { + s.trim().to_owned() + }).collect() } }; diff --git a/src/librustc/metadata/cstore.rs b/src/librustc/metadata/cstore.rs index 6cc01cb90b8..f0cabe50455 100644 --- a/src/librustc/metadata/cstore.rs +++ b/src/librustc/metadata/cstore.rs @@ -17,6 +17,7 @@ use core::prelude::*; use metadata::cstore; use metadata::decoder; +use core::iterator::IteratorUtil; use core::hashmap::HashMap; use core::vec; use extra; @@ -114,7 +115,7 @@ pub fn get_used_libraries(cstore: &CStore) -> ~[~str] { } pub fn add_used_link_args(cstore: &mut CStore, args: &str) { - for args.each_split_char(' ') |s| { + for args.split_iter(' ').advance |s| { cstore.used_link_args.push(s.to_owned()); } } diff --git a/src/librusti/rusti.rc b/src/librusti/rusti.rc index 88482c94336..6d4043bfc89 100644 --- a/src/librusti/rusti.rc +++ b/src/librusti/rusti.rc @@ -339,9 +339,11 @@ pub fn run_line(repl: &mut Repl, in: @io::Reader, out: @io::Writer, line: ~str, use_rl: bool) -> Option<Repl> { if line.starts_with(":") { + // FIXME #5898: conflicts with Cell.take(), so can't be at the top level + use core::iterator::IteratorUtil; + let full = line.substr(1, line.len() - 1); - let mut split = ~[]; - for str::each_word(full) |word| { split.push(word.to_owned()) } + let split: ~[~str] = full.word_iter().transform(|s| s.to_owned()).collect(); let len = split.len(); if len > 0 { diff --git a/src/librustpkg/path_util.rs b/src/librustpkg/path_util.rs index af6b23ba755..c039eeacbe4 100644 --- a/src/librustpkg/path_util.rs +++ b/src/librustpkg/path_util.rs @@ -17,6 +17,7 @@ pub use target::{OutputType, Main, Lib, Test, Bench, Target, Build, Install}; use core::libc::consts::os::posix88::{S_IRUSR, S_IWUSR, S_IXUSR}; use core::os::mkdir_recursive; use core::os; +use core::iterator::IteratorUtil; /// Returns the value of RUST_PATH, as a list /// of Paths. In general this should be read from the @@ -166,7 +167,7 @@ fn library_in_workspace(full_name: &str, short_name: &str, where: Target, let f_name = match p_path.filename() { Some(s) => s, None => loop }; - for f_name.each_split_char('-') |piece| { + for f_name.split_iter('-').advance |piece| { debug!("a piece = %s", piece); if which == 0 && piece != lib_prefix { break; diff --git a/src/librustpkg/rustpkg.rc b/src/librustpkg/rustpkg.rc index 37b8c2ad433..2db51fe969f 100644 --- a/src/librustpkg/rustpkg.rc +++ b/src/librustpkg/rustpkg.rc @@ -29,6 +29,7 @@ extern mod syntax; use core::prelude::*; use core::*; +use core::iterator::IteratorUtil; pub use core::path::Path; use core::hashmap::HashMap; use rustc::driver::{driver, session}; @@ -161,10 +162,8 @@ impl<'self> PkgScript<'self> { exe.to_str(), root.to_str(), "configs"); let output = run::process_output(exe.to_str(), [root.to_str(), ~"configs"]); // Run the configs() function to get the configs - let mut cfgs = ~[]; - for str::each_word(str::from_bytes(output.output)) |w| { - cfgs.push(w.to_owned()); - } + let cfgs = str::from_bytes_slice(output.output).word_iter() + .transform(|w| w.to_owned()).collect(); (cfgs, output.status) } } diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 6ce6d81bf24..2069e61f11e 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -30,6 +30,7 @@ use cast; use io; +use iterator::IteratorUtil; use libc; use libc::{c_char, c_void, c_int, size_t}; use libc::{mode_t, FILE}; @@ -224,12 +225,11 @@ pub fn env() -> ~[(~str,~str)] { fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] { let mut pairs = ~[]; for input.each |p| { - let mut vs = ~[]; - for str::each_splitn_char(*p, '=', 1) |s| { vs.push(s.to_owned()) } + let vs: ~[&str] = p.splitn_iter('=', 1).collect(); debug!("splitting: len: %u", vs.len()); assert_eq!(vs.len(), 2); - pairs.push((copy vs[0], copy vs[1])); + pairs.push((vs[0].to_owned(), vs[1].to_owned())); } pairs } diff --git a/src/libstd/path.rs b/src/libstd/path.rs index a551b9bf3c0..b2f25d41157 100644 --- a/src/libstd/path.rs +++ b/src/libstd/path.rs @@ -18,6 +18,7 @@ Cross-platform file path handling use container::Container; use cmp::Eq; +use iterator::IteratorUtil; use libc; use option::{None, Option, Some}; use str; @@ -449,10 +450,9 @@ impl ToStr for PosixPath { // PosixPath and WindowsPath, most of their methods are common. impl GenericPath for PosixPath { fn from_str(s: &str) -> PosixPath { - let mut components = ~[]; - for str::each_split_nonempty(s, |c| c == '/') |s| { - components.push(s.to_owned()) - } + let components = s.split_iter('/') + .filter_map(|s| if s.is_empty() {None} else {Some(s.to_owned())}) + .collect(); let is_absolute = (s.len() != 0 && s[0] == '/' as u8); PosixPath { is_absolute: is_absolute, @@ -508,7 +508,7 @@ impl GenericPath for PosixPath { } fn with_filename(&self, f: &str) -> PosixPath { - assert!(! str::any(f, |c| windows::is_sep(c as u8))); + assert!(! str::any(f, |c| windows::is_sep(c))); self.dir_path().push(f) } @@ -569,11 +569,11 @@ impl GenericPath for PosixPath { fn push_many(&self, cs: &[~str]) -> PosixPath { let mut v = copy self.components; for cs.each |e| { - let mut ss = ~[]; - for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| { - ss.push(s.to_owned()) + for e.split_iter(windows::is_sep).advance |s| { + if !s.is_empty() { + v.push(s.to_owned()) + } } - v.push_all_move(ss); } PosixPath { is_absolute: self.is_absolute, @@ -583,11 +583,11 @@ impl GenericPath for PosixPath { fn push(&self, s: &str) -> PosixPath { let mut v = copy self.components; - let mut ss = ~[]; - for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| { - ss.push(s.to_owned()) + for s.split_iter(windows::is_sep).advance |s| { + if !s.is_empty() { + v.push(s.to_owned()) + } } - v.push_all_move(ss); PosixPath { components: v, ..copy *self } } @@ -661,11 +661,11 @@ impl GenericPath for WindowsPath { } } - let mut components = ~[]; - for str::each_split_nonempty(rest, |c| windows::is_sep(c as u8)) |s| { - components.push(s.to_owned()) - } - let is_absolute = (rest.len() != 0 && windows::is_sep(rest[0])); + let components = rest.split_iter(windows::is_sep) + .filter_map(|s| if s.is_empty() {None} else {Some(s.to_owned())}) + .collect(); + + let is_absolute = (rest.len() != 0 && windows::is_sep(rest[0] as char)); WindowsPath { host: host, device: device, @@ -722,7 +722,7 @@ impl GenericPath for WindowsPath { } fn with_filename(&self, f: &str) -> WindowsPath { - assert!(! str::any(f, |c| windows::is_sep(c as u8))); + assert!(! str::any(f, |c| windows::is_sep(c))); self.dir_path().push(f) } @@ -826,11 +826,11 @@ impl GenericPath for WindowsPath { fn push_many(&self, cs: &[~str]) -> WindowsPath { let mut v = copy self.components; for cs.each |e| { - let mut ss = ~[]; - for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| { - ss.push(s.to_owned()) + for e.split_iter(windows::is_sep).advance |s| { + if !s.is_empty() { + v.push(s.to_owned()) + } } - v.push_all_move(ss); } // tedious, but as-is, we can't use ..self WindowsPath { @@ -843,11 +843,11 @@ impl GenericPath for WindowsPath { fn push(&self, s: &str) -> WindowsPath { let mut v = copy self.components; - let mut ss = ~[]; - for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| { - ss.push(s.to_owned()) + for s.split_iter(windows::is_sep).advance |s| { + if !s.is_empty() { + v.push(s.to_owned()) + } } - v.push_all_move(ss); WindowsPath { components: v, ..copy *self } } @@ -905,8 +905,8 @@ pub mod windows { use option::{None, Option, Some}; #[inline(always)] - pub fn is_sep(u: u8) -> bool { - u == '/' as u8 || u == '\\' as u8 + pub fn is_sep(u: char) -> bool { + u == '/' || u == '\\' } pub fn extract_unc_prefix(s: &str) -> Option<(~str,~str)> { @@ -915,7 +915,7 @@ pub mod windows { s[0] == s[1]) { let mut i = 2; while i < s.len() { - if is_sep(s[i]) { + if is_sep(s[i] as char) { let pre = s.slice(2, i).to_owned(); let rest = s.slice(i, s.len()).to_owned(); return Some((pre, rest)); diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 2af300fc1b8..930026fa4f7 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -25,7 +25,7 @@ use clone::Clone; use cmp::{TotalOrd, Ordering, Less, Equal, Greater}; use container::Container; use iter::Times; -use iterator::{Iterator, IteratorUtil}; +use iterator::{Iterator, IteratorUtil, FilterIterator}; use libc; use option::{None, Option, Some}; use old_iter::{BaseIter, EqIter}; @@ -633,128 +633,92 @@ pub fn slice<'a>(s: &'a str, begin: uint, end: uint) -> &'a str { unsafe { raw::slice_bytes(s, begin, end) } } -/// Splits a string into substrings at each occurrence of a given character -pub fn each_split_char<'a>(s: &'a str, sep: char, - it: &fn(&'a str) -> bool) -> bool { - each_split_char_inner(s, sep, len(s), true, true, it) -} +/// An iterator over the substrings of a string, separated by `sep`. +pub struct StrCharSplitIterator<'self,Sep> { + priv string: &'self str, + priv position: uint, + priv sep: Sep, + /// The number of splits remaining + priv count: uint, + /// Whether an empty string at the end is allowed + priv allow_trailing_empty: bool, + priv finished: bool, + priv only_ascii: bool +} + +/// An iterator over the words of a string, separated by an sequence of whitespace +pub type WordIterator<'self> = + FilterIterator<'self, &'self str, + StrCharSplitIterator<'self, extern "Rust" fn(char) -> bool>>; + +/// A separator for splitting a string character-wise +pub trait StrCharSplitSeparator { + /// Determine if the splitter should split at the given character + fn should_split(&self, char) -> bool; + /// Indicate if the splitter only uses ASCII characters, which + /// allows for a faster implementation. + fn only_ascii(&self) -> bool; +} +impl StrCharSplitSeparator for char { + #[inline(always)] + fn should_split(&self, c: char) -> bool { *self == c } -/// Like `each_split_char`, but a trailing empty string is omitted -pub fn each_split_char_no_trailing<'a>(s: &'a str, - sep: char, - it: &fn(&'a str) -> bool) -> bool { - each_split_char_inner(s, sep, len(s), true, false, it) + fn only_ascii(&self) -> bool { (*self as uint) < 128 } } +impl<'self> StrCharSplitSeparator for &'self fn(char) -> bool { + #[inline(always)] + fn should_split(&self, c: char) -> bool { (*self)(c) } -/** - * Splits a string into substrings at each occurrence of a given - * character up to 'count' times. - * - * The character must be a valid UTF-8/ASCII character - */ -pub fn each_splitn_char<'a>(s: &'a str, - sep: char, - count: uint, - it: &fn(&'a str) -> bool) -> bool { - each_split_char_inner(s, sep, count, true, true, it) + fn only_ascii(&self) -> bool { false } } +impl<'self> StrCharSplitSeparator for extern "Rust" fn(char) -> bool { + #[inline(always)] + fn should_split(&self, c: char) -> bool { (*self)(c) } -/// Like `each_split_char`, but omits empty strings -pub fn each_split_char_nonempty<'a>(s: &'a str, - sep: char, - it: &fn(&'a str) -> bool) -> bool { - each_split_char_inner(s, sep, len(s), false, false, it) + fn only_ascii(&self) -> bool { false } } -fn each_split_char_inner<'a>(s: &'a str, - sep: char, - count: uint, - allow_empty: bool, - allow_trailing_empty: bool, - it: &fn(&'a str) -> bool) -> bool { - if sep < 128u as char { - let (b, l) = (sep as u8, len(s)); - let mut done = 0u; - let mut (i, start) = (0u, 0u); - while i < l && done < count { - if s[i] == b { - if allow_empty || start < i { - if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { - return false; - } - } - start = i + 1u; - done += 1u; - } - i += 1u; - } - // only slice a non-empty trailing substring - if allow_trailing_empty || start < l { - if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return false; } - } - return true; - } - return each_split_inner(s, |cur| cur == sep, count, - allow_empty, allow_trailing_empty, it) -} +impl<'self, Sep: StrCharSplitSeparator> Iterator<&'self str> for StrCharSplitIterator<'self, Sep> { + fn next(&mut self) -> Option<&'self str> { + if self.finished { return None } -/// Splits a string into substrings using a character function -pub fn each_split<'a>(s: &'a str, - sepfn: &fn(char) -> bool, - it: &fn(&'a str) -> bool) -> bool { - each_split_inner(s, sepfn, len(s), true, true, it) -} + let l = self.string.len(); + let start = self.position; -/// Like `each_split`, but a trailing empty string is omitted -pub fn each_split_no_trailing<'a>(s: &'a str, - sepfn: &fn(char) -> bool, - it: &fn(&'a str) -> bool) -> bool { - each_split_inner(s, sepfn, len(s), true, false, it) -} + if self.only_ascii { + // this gives a *huge* speed up for splitting on ASCII + // characters (e.g. '\n' or ' ') + while self.position < l && self.count > 0 { + let byte = self.string[self.position]; -/** - * Splits a string into substrings using a character function, cutting at - * most `count` times. - */ -pub fn each_splitn<'a>(s: &'a str, - sepfn: &fn(char) -> bool, - count: uint, - it: &fn(&'a str) -> bool) -> bool { - each_split_inner(s, sepfn, count, true, true, it) -} - -/// Like `each_split`, but omits empty strings -pub fn each_split_nonempty<'a>(s: &'a str, - sepfn: &fn(char) -> bool, - it: &fn(&'a str) -> bool) -> bool { - each_split_inner(s, sepfn, len(s), false, false, it) -} - -fn each_split_inner<'a>(s: &'a str, - sepfn: &fn(cc: char) -> bool, - count: uint, - allow_empty: bool, - allow_trailing_empty: bool, - it: &fn(&'a str) -> bool) -> bool { - let l = len(s); - let mut (i, start, done) = (0u, 0u, 0u); - while i < l && done < count { - let CharRange {ch, next} = char_range_at(s, i); - if sepfn(ch) { - if allow_empty || start < i { - if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { - return false; + if self.sep.should_split(byte as char) { + let slice = unsafe { raw::slice_bytes(self.string, start, self.position) }; + self.position += 1; + self.count -= 1; + return Some(slice); } + self.position += 1; + } + } else { + while self.position < l && self.count > 0 { + let CharRange {ch, next} = char_range_at(self.string, self.position); + + if self.sep.should_split(ch) { + let slice = unsafe { raw::slice_bytes(self.string, start, self.position) }; + self.position = next; + self.count -= 1; + return Some(slice); + } + self.position = next; } - start = next; - done += 1u; } - i = next; - } - if allow_trailing_empty || start < l { - if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return false; } + self.finished = true; + if self.allow_trailing_empty || start < l { + Some(unsafe { raw::slice_bytes(self.string, start, l) }) + } else { + None + } } - return true; } // See Issue #1932 for why this is a naive search @@ -876,18 +840,11 @@ pub fn levdistance(s: &str, t: &str) -> uint { } /** - * Splits a string into substrings separated by LF ('\n'). - */ -pub fn each_line<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool { - each_split_char_no_trailing(s, '\n', it) -} - -/** * Splits a string into substrings separated by LF ('\n') * and/or CR LF ("\r\n") */ pub fn each_line_any<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool { - for each_line(s) |s| { + for s.line_iter().advance |s| { let l = s.len(); if l > 0u && s[l - 1u] == '\r' as u8 { if !it( unsafe { raw::slice_bytes(s, 0, l - 1) } ) { return false; } @@ -898,11 +855,6 @@ pub fn each_line_any<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool { return true; } -/// Splits a string into substrings separated by whitespace -pub fn each_word<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool { - each_split_nonempty(s, char::is_whitespace, it) -} - /** Splits a string into substrings with possibly internal whitespace, * each of them at most `lim` bytes long. The substrings have leading and trailing * whitespace removed, and are only cut at whitespace boundaries. @@ -2216,7 +2168,7 @@ pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T { * ~~~ {.rust} * let string = "a\nb\nc"; * let mut lines = ~[]; - * for each_line(string) |line| { lines.push(line) } + * for string.line_iter().advance |line| { lines.push(line) } * * assert!(subslice_offset(string, lines[0]) == 0); // &"a" * assert!(subslice_offset(string, lines[1]) == 2); // &"b" @@ -2523,6 +2475,18 @@ pub trait StrSlice<'self> { fn rev_iter(&self) -> StrCharRevIterator<'self>; fn bytes_iter(&self) -> StrBytesIterator<'self>; fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self>; + fn split_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep>; + fn splitn_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, count: uint) + -> StrCharSplitIterator<'self, Sep>; + fn split_options_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, + count: uint, allow_trailing_empty: bool) + -> StrCharSplitIterator<'self, Sep>; + /// An iterator over the lines of a string (subsequences separated + /// by `\n`). + fn line_iter(&self) -> StrCharSplitIterator<'self, char>; + /// An iterator over the words of a string (subsequences separated + /// by any sequence of whitespace). + fn word_iter(&self) -> WordIterator<'self>; fn ends_with(&self, needle: &str) -> bool; fn is_empty(&self) -> bool; fn is_whitespace(&self) -> bool; @@ -2530,8 +2494,6 @@ pub trait StrSlice<'self> { fn len(&self) -> uint; fn char_len(&self) -> uint; fn slice(&self, begin: uint, end: uint) -> &'self str; - fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) -> bool; - fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) -> bool; fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool) -> bool; fn starts_with<'a>(&self, needle: &'a str) -> bool; fn substr(&self, begin: uint, n: uint) -> &'self str; @@ -2597,6 +2559,36 @@ impl<'self> StrSlice<'self> for &'self str { StrBytesRevIterator { it: as_bytes_slice(*self).rev_iter() } } + fn split_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep> { + self.split_options_iter(sep, self.len(), true) + } + + fn splitn_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, count: uint) + -> StrCharSplitIterator<'self, Sep> { + self.split_options_iter(sep, count, true) + } + fn split_options_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, + count: uint, allow_trailing_empty: bool) + -> StrCharSplitIterator<'self, Sep> { + let only_ascii = sep.only_ascii(); + StrCharSplitIterator { + string: *self, + position: 0, + sep: sep, + count: count, + allow_trailing_empty: allow_trailing_empty, + finished: false, + only_ascii: only_ascii + } + } + + fn line_iter(&self) -> StrCharSplitIterator<'self, char> { + self.split_options_iter('\n', self.len(), false) + } + fn word_iter(&self) -> WordIterator<'self> { + self.split_iter(char::is_whitespace).filter(|s| !s.is_empty()) + } + /// Returns true if one string ends with another #[inline] @@ -2637,18 +2629,6 @@ impl<'self> StrSlice<'self> for &'self str { fn slice(&self, begin: uint, end: uint) -> &'self str { slice(*self, begin, end) } - /// Splits a string into substrings using a character function - #[inline] - fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) -> bool { - each_split(*self, sepfn, it) - } - /** - * Splits a string into substrings at each occurrence of a given character - */ - #[inline] - fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) -> bool { - each_split_char(*self, sep, it) - } /** * Splits a string into a vector of the substrings separated by a given * string @@ -2905,131 +2885,6 @@ mod tests { } #[test] - fn test_split_char() { - fn t(s: &str, c: char, u: &[~str]) { - debug!("split_byte: %?", s); - let mut v = ~[]; - for each_split_char(s, c) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - t("abc.hello.there", '.', [~"abc", ~"hello", ~"there"]); - t(".hello.there", '.', [~"", ~"hello", ~"there"]); - t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there", ~""]); - - t("", 'z', [~""]); - t("z", 'z', [~"",~""]); - t("ok", 'z', [~"ok"]); - } - - #[test] - fn test_split_char_2() { - fn t(s: &str, c: char, u: &[~str]) { - debug!("split_byte: %?", s); - let mut v = ~[]; - for each_split_char(s, c) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - let data = "ประเทศไทย中华Việt Nam"; - t(data, 'V', [~"ประเทศไทย中华", ~"iệt Nam"]); - t(data, 'ท', [~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]); - } - - #[test] - fn test_splitn_char() { - fn t(s: &str, c: char, n: uint, u: &[~str]) { - debug!("splitn_byte: %?", s); - let mut v = ~[]; - for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - debug!("comparing vs. %?", u); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - t("abc.hello.there", '.', 0u, [~"abc.hello.there"]); - t("abc.hello.there", '.', 1u, [~"abc", ~"hello.there"]); - t("abc.hello.there", '.', 2u, [~"abc", ~"hello", ~"there"]); - t("abc.hello.there", '.', 3u, [~"abc", ~"hello", ~"there"]); - t(".hello.there", '.', 0u, [~".hello.there"]); - t(".hello.there", '.', 1u, [~"", ~"hello.there"]); - t("...hello.there.", '.', 3u, [~"", ~"", ~"", ~"hello.there."]); - t("...hello.there.", '.', 5u, [~"", ~"", ~"", ~"hello", ~"there", ~""]); - - t("", 'z', 5u, [~""]); - t("z", 'z', 5u, [~"",~""]); - t("ok", 'z', 5u, [~"ok"]); - t("z", 'z', 0u, [~"z"]); - t("w.x.y", '.', 0u, [~"w.x.y"]); - t("w.x.y", '.', 1u, [~"w",~"x.y"]); - } - - #[test] - fn test_splitn_char_2() { - fn t(s: &str, c: char, n: uint, u: &[~str]) { - debug!("splitn_byte: %?", s); - let mut v = ~[]; - for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - debug!("comparing vs. %?", u); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - - t("ประเทศไทย中华Việt Nam", '华', 1u, [~"ประเทศไทย中", ~"Việt Nam"]); - t("zzXXXzYYYzWWWz", 'z', 3u, [~"", ~"", ~"XXX", ~"YYYzWWWz"]); - t("z", 'z', 5u, [~"",~""]); - t("", 'z', 5u, [~""]); - t("ok", 'z', 5u, [~"ok"]); - } - - #[test] - fn test_splitn_char_3() { - fn t(s: &str, c: char, n: uint, u: &[~str]) { - debug!("splitn_byte: %?", s); - let mut v = ~[]; - for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - debug!("comparing vs. %?", u); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - let data = "ประเทศไทย中华Việt Nam"; - t(data, 'V', 1u, [~"ประเทศไทย中华", ~"iệt Nam"]); - t(data, 'ท', 1u, [~"ประเ", ~"ศไทย中华Việt Nam"]); - } - - #[test] - fn test_split_char_no_trailing() { - fn t(s: &str, c: char, u: &[~str]) { - debug!("split_byte: %?", s); - let mut v = ~[]; - for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - t("abc.hello.there", '.', [~"abc", ~"hello", ~"there"]); - t(".hello.there", '.', [~"", ~"hello", ~"there"]); - t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there"]); - - t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there"]); - t("", 'z', []); - t("z", 'z', [~""]); - t("ok", 'z', [~"ok"]); - } - - #[test] - fn test_split_char_no_trailing_2() { - fn t(s: &str, c: char, u: &[~str]) { - debug!("split_byte: %?", s); - let mut v = ~[]; - for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) } - debug!("split_byte to: %?", v); - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - let data = "ประเทศไทย中华Việt Nam"; - t(data, 'V', [~"ประเทศไทย中华", ~"iệt Nam"]); - t(data, 'ท', [~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]); - } - - #[test] fn test_split_str() { fn t<'a>(s: &str, sep: &'a str, u: &[~str]) { let mut v = ~[]; @@ -3054,75 +2909,6 @@ mod tests { #[test] - fn test_split() { - fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) { - let mut v = ~[]; - for each_split(s, sepf) |s| { v.push(s.to_owned()) } - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - - t("ประเทศไทย中华Việt Nam", |cc| cc == '华', [~"ประเทศไทย中", ~"Việt Nam"]); - t("zzXXXzYYYz", char::is_lowercase, [~"", ~"", ~"XXX", ~"YYY", ~""]); - t("zzXXXzYYYz", char::is_uppercase, [~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]); - t("z", |cc| cc == 'z', [~"",~""]); - t("", |cc| cc == 'z', [~""]); - t("ok", |cc| cc == 'z', [~"ok"]); - } - - #[test] - fn test_split_no_trailing() { - fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) { - let mut v = ~[]; - for each_split_no_trailing(s, sepf) |s| { v.push(s.to_owned()) } - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - - t("ประเทศไทย中华Việt Nam", |cc| cc == '华', [~"ประเทศไทย中", ~"Việt Nam"]); - t("zzXXXzYYYz", char::is_lowercase, [~"", ~"", ~"XXX", ~"YYY"]); - t("zzXXXzYYYz", char::is_uppercase, [~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]); - t("z", |cc| cc == 'z', [~""]); - t("", |cc| cc == 'z', []); - t("ok", |cc| cc == 'z', [~"ok"]); - } - - #[test] - fn test_lines() { - let lf = "\nMary had a little lamb\nLittle lamb\n"; - let crlf = "\r\nMary had a little lamb\r\nLittle lamb\r\n"; - - fn t(s: &str, f: &fn(&str, &fn(&str) -> bool) -> bool, u: &[~str]) { - let mut v = ~[]; - for f(s) |s| { v.push(s.to_owned()) } - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - - t(lf, each_line, [~"", ~"Mary had a little lamb", ~"Little lamb"]); - t(lf, each_line_any, [~"", ~"Mary had a little lamb", ~"Little lamb"]); - t(crlf, each_line, [~"\r", ~"Mary had a little lamb\r", ~"Little lamb\r"]); - t(crlf, each_line_any, [~"", ~"Mary had a little lamb", ~"Little lamb"]); - t("", each_line, []); - t("", each_line_any, []); - t("\n", each_line, [~""]); - t("\n", each_line_any, [~""]); - t("banana", each_line, [~"banana"]); - t("banana", each_line_any, [~"banana"]); - } - - #[test] - fn test_words() { - fn t(s: &str, f: &fn(&str, &fn(&str) -> bool) -> bool, u: &[~str]) { - let mut v = ~[]; - for f(s) |s| { v.push(s.to_owned()) } - assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b)); - } - let data = "\nMary had a little lamb\nLittle lamb\n"; - - t(data, each_word, [~"Mary",~"had",~"a",~"little",~"lamb",~"Little",~"lamb"]); - t("ok", each_word, [~"ok"]); - t("", each_word, []); - } - - #[test] fn test_split_within() { fn t(s: &str, i: uint, u: &[~str]) { let mut v = ~[]; @@ -3671,7 +3457,7 @@ mod tests { let string = "a\nb\nc"; let mut lines = ~[]; - for each_line(string) |line| { lines.push(line) } + for string.line_iter().advance |line| { lines.push(line) } assert_eq!(subslice_offset(string, lines[0]), 0); assert_eq!(subslice_offset(string, lines[1]), 2); assert_eq!(subslice_offset(string, lines[2]), 4); @@ -3731,78 +3517,6 @@ mod tests { } #[test] - fn test_split_char_each() { - let data = "\nMary had a little lamb\nLittle lamb\n"; - - let mut ii = 0; - - for each_split_char(data, ' ') |xx| { - match ii { - 0 => assert!("\nMary" == xx), - 1 => assert!("had" == xx), - 2 => assert!("a" == xx), - 3 => assert!("little" == xx), - _ => () - } - ii += 1; - } - } - - #[test] - fn test_splitn_char_each() { - let data = "\nMary had a little lamb\nLittle lamb\n"; - - let mut ii = 0; - - for each_splitn_char(data, ' ', 2u) |xx| { - match ii { - 0 => assert!("\nMary" == xx), - 1 => assert!("had" == xx), - 2 => assert!("a little lamb\nLittle lamb\n" == xx), - _ => () - } - ii += 1; - } - } - - #[test] - fn test_words_each() { - let data = "\nMary had a little lamb\nLittle lamb\n"; - - let mut ii = 0; - - for each_word(data) |ww| { - match ii { - 0 => assert!("Mary" == ww), - 1 => assert!("had" == ww), - 2 => assert!("a" == ww), - 3 => assert!("little" == ww), - _ => () - } - ii += 1; - } - - each_word("", |_x| fail!()); // should not fail - } - - #[test] - fn test_lines_each () { - let lf = "\nMary had a little lamb\nLittle lamb\n"; - - let mut ii = 0; - - for each_line(lf) |x| { - match ii { - 0 => assert!("" == x), - 1 => assert!("Mary had a little lamb" == x), - 2 => assert!("Little lamb" == x), - _ => () - } - ii += 1; - } - } - - #[test] fn test_map() { assert_eq!(~"", map("", |c| unsafe {libc::toupper(c as c_char)} as char)); assert_eq!(~"YMCA", map("ymca", |c| unsafe {libc::toupper(c as c_char)} as char)); @@ -4015,4 +3729,68 @@ mod tests { assert_eq!(b, v[pos]); } } + + #[test] + fn test_split_char_iterator() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: ~[&str] = data.split_iter(' ').collect(); + assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect(); + assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + // Unicode + let split: ~[&str] = data.split_iter('ä').collect(); + assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + + let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect(); + assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + } + #[test] + fn test_splitn_char_iterator() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: ~[&str] = data.splitn_iter(' ', 3).collect(); + assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]); + + let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect(); + assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]); + + // Unicode + let split: ~[&str] = data.splitn_iter('ä', 3).collect(); + assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]); + + let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect(); + assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]); + } + + #[test] + fn test_split_char_iterator_no_trailing() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect(); + assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]); + + let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect(); + assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]); + } + + #[test] + fn test_word_iter() { + let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n"; + let words: ~[&str] = data.word_iter().collect(); + assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"]) + } + + #[test] + fn test_line_iter() { + let data = "\nMäry häd ä little lämb\n\nLittle lämb\n"; + let lines: ~[&str] = data.line_iter().collect(); + assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]); + + let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n + let lines: ~[&str] = data.line_iter().collect(); + assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]); + } } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 59db35201f1..3ff894c267b 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -92,6 +92,7 @@ use parse::{new_sub_parser_from_file, next_node_id, ParseSess}; use opt_vec; use opt_vec::OptVec; +use core::iterator::IteratorUtil; use core::either::Either; use core::either; use core::hashmap::HashSet; @@ -3981,17 +3982,15 @@ impl Parser { token::LIT_STR(s) => { self.bump(); let the_string = ident_to_str(&s); - let mut words = ~[]; - for str::each_word(*the_string) |s| { words.push(s) } let mut abis = AbiSet::empty(); - for words.each |word| { - match abi::lookup(*word) { + for the_string.word_iter().advance |word| { + match abi::lookup(word) { Some(abi) => { if abis.contains(abi) { self.span_err( *self.span, fmt!("ABI `%s` appears twice", - *word)); + word)); } else { abis.add(abi); } @@ -4006,7 +4005,7 @@ impl Parser { str::connect_slices( abi::all_names(), ", "), - *word)); + word)); } } } diff --git a/src/test/bench/sudoku.rs b/src/test/bench/sudoku.rs index 3bf08cfb934..d05edef25a6 100644 --- a/src/test/bench/sudoku.rs +++ b/src/test/bench/sudoku.rs @@ -14,6 +14,7 @@ extern mod extra; use std::io::{ReaderUtil, WriterUtil}; use std::io; +use std::iterator::IteratorUtil; use std::os; use std::str; use std::u8; @@ -73,8 +74,8 @@ impl Sudoku { let mut g = vec::from_fn(10u, { |_i| ~[0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] }); while !reader.eof() { let line = reader.read_line(); - let mut comps = ~[]; - for str::each_split_char(line.trim(), ',') |s| { comps.push(s.to_owned()) } + let comps: ~[&str] = line.trim().split_iter(',').collect(); + if comps.len() == 3u { let row = uint::from_str(comps[0]).get() as u8; let col = uint::from_str(comps[1]).get() as u8; |
