diff options
| author | Kevin Butler <haqkrs@gmail.com> | 2015-11-12 02:43:43 +0000 |
|---|---|---|
| committer | Kevin Butler <haqkrs@gmail.com> | 2016-01-16 00:57:12 +0000 |
| commit | 24578e0fe555f267bef40528b8ac79bc7e898007 (patch) | |
| tree | 336f724d68ee4dc447ced71e23636de956d16870 /src/libsyntax/util/parser_testing.rs | |
| parent | 9e3e43f3f6bb0d87da5f5b7fd92db0cc990e62a3 (diff) | |
| download | rust-24578e0fe555f267bef40528b8ac79bc7e898007.tar.gz rust-24578e0fe555f267bef40528b8ac79bc7e898007.zip | |
libsyntax: accept only whitespace with the PATTERN_WHITE_SPACE property
This aligns with unicode recommendations and should be stable for all future unicode releases. See http://unicode.org/reports/tr31/#R3. This renames `libsyntax::lexer::is_whitespace` to `is_pattern_whitespace` so potentially breaks users of libsyntax.
Diffstat (limited to 'src/libsyntax/util/parser_testing.rs')
| -rw-r--r-- | src/libsyntax/util/parser_testing.rs | 39 |
1 files changed, 19 insertions, 20 deletions
diff --git a/src/libsyntax/util/parser_testing.rs b/src/libsyntax/util/parser_testing.rs index c19033f2347..a78950e959f 100644 --- a/src/libsyntax/util/parser_testing.rs +++ b/src/libsyntax/util/parser_testing.rs @@ -10,7 +10,7 @@ use ast; use parse::{ParseSess,PResult,filemap_to_tts}; -use parse::new_parser_from_source_str; +use parse::{lexer, new_parser_from_source_str}; use parse::parser::Parser; use parse::token; use ptr::P; @@ -97,8 +97,8 @@ pub fn matches_codepattern(a : &str, b : &str) -> bool { let (a, b) = match (a_iter.peek(), b_iter.peek()) { (None, None) => return true, (None, _) => return false, - (Some(a), None) => { - if a.is_whitespace() { + (Some(&a), None) => { + if is_pattern_whitespace(a) { break // trailing whitespace check is out of loop for borrowck } else { return false @@ -107,11 +107,11 @@ pub fn matches_codepattern(a : &str, b : &str) -> bool { (Some(&a), Some(&b)) => (a, b) }; - if a.is_whitespace() && b.is_whitespace() { + if is_pattern_whitespace(a) && is_pattern_whitespace(b) { // skip whitespace for a and b scan_for_non_ws_or_end(&mut a_iter); scan_for_non_ws_or_end(&mut b_iter); - } else if a.is_whitespace() { + } else if is_pattern_whitespace(a) { // skip whitespace for a scan_for_non_ws_or_end(&mut a_iter); } else if a == b { @@ -123,23 +123,18 @@ pub fn matches_codepattern(a : &str, b : &str) -> bool { } // check if a has *only* trailing whitespace - a_iter.all(|c| c.is_whitespace()) + a_iter.all(is_pattern_whitespace) } /// Advances the given peekable `Iterator` until it reaches a non-whitespace character fn scan_for_non_ws_or_end<I: Iterator<Item= char>>(iter: &mut Peekable<I>) { - loop { - match iter.peek() { - Some(c) if c.is_whitespace() => {} // fall through; borrowck - _ => return - } - + while lexer::is_pattern_whitespace(iter.peek().cloned()) { iter.next(); } } -pub fn is_whitespace(c: char) -> bool { - c.is_whitespace() +pub fn is_pattern_whitespace(c: char) -> bool { + lexer::is_pattern_whitespace(Some(c)) } #[cfg(test)] @@ -162,14 +157,18 @@ mod tests { } #[test] - fn more_whitespace() { + fn pattern_whitespace() { assert_eq!(matches_codepattern("","\x0C"), false); - assert_eq!(matches_codepattern("a b","a\u{2002}b"),true); assert_eq!(matches_codepattern("a b ","a \u{0085}\n\t\r b"),true); assert_eq!(matches_codepattern("a b","a \u{0085}\n\t\r b "),false); - assert_eq!(matches_codepattern("a b","a\u{2002}b"),true); - assert_eq!(matches_codepattern("ab","a\u{2003}b"),false); - assert_eq!(matches_codepattern("a \u{3000}b","ab"),true); - assert_eq!(matches_codepattern("\u{205F}a b","ab"),true); + } + + #[test] + fn non_pattern_whitespace() { + // These have the property 'White_Space' but not 'Pattern_White_Space' + assert_eq!(matches_codepattern("a b","a\u{2002}b"), false); + assert_eq!(matches_codepattern("a b","a\u{2002}b"), false); + assert_eq!(matches_codepattern("\u{205F}a b","ab"), false); + assert_eq!(matches_codepattern("a \u{3000}b","ab"), false); } } |
