diff options
| author | Kevin Butler <haqkrs@gmail.com> | 2015-11-12 02:43:43 +0000 |
|---|---|---|
| committer | Kevin Butler <haqkrs@gmail.com> | 2016-01-16 00:57:12 +0000 |
| commit | 24578e0fe555f267bef40528b8ac79bc7e898007 (patch) | |
| tree | 336f724d68ee4dc447ced71e23636de956d16870 /src/libsyntax/parse/lexer | |
| parent | 9e3e43f3f6bb0d87da5f5b7fd92db0cc990e62a3 (diff) | |
| download | rust-24578e0fe555f267bef40528b8ac79bc7e898007.tar.gz rust-24578e0fe555f267bef40528b8ac79bc7e898007.zip | |
libsyntax: accept only whitespace with the PATTERN_WHITE_SPACE property
This aligns with unicode recommendations and should be stable for all future unicode releases. See http://unicode.org/reports/tr31/#R3. This renames `libsyntax::lexer::is_whitespace` to `is_pattern_whitespace` so potentially breaks users of libsyntax.
Diffstat (limited to 'src/libsyntax/parse/lexer')
| -rw-r--r-- | src/libsyntax/parse/lexer/comments.rs | 4 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 17 |
2 files changed, 12 insertions, 9 deletions
diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index e336c98f03c..629edced804 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -15,7 +15,7 @@ use codemap::{BytePos, CharPos, CodeMap, Pos}; use errors; use parse::lexer::is_block_doc_comment; use parse::lexer::{StringReader, TokenAndSpan}; -use parse::lexer::{is_whitespace, Reader}; +use parse::lexer::{is_pattern_whitespace, Reader}; use parse::lexer; use print::pprust; use str::char_at; @@ -153,7 +153,7 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) { } fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, comments: &mut Vec<Comment>) { - while is_whitespace(rdr.curr) && !rdr.is_eof() { + while is_pattern_whitespace(rdr.curr) && !rdr.is_eof() { if rdr.col == CharPos(0) && rdr.curr_is('\n') { push_blank_line_comment(rdr, &mut *comments); } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 3e61aaff3c9..88a876cac73 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -16,6 +16,7 @@ use ext::tt::transcribe::tt_next_token; use parse::token::str_to_ident; use parse::token; use str::char_at; +use rustc_unicode::property::Pattern_White_Space; use std::borrow::Cow; use std::char; @@ -546,10 +547,10 @@ impl<'a> StringReader<'a> { let c = self.scan_comment(); debug!("scanning a comment {:?}", c); c - } - c if is_whitespace(Some(c)) => { + }, + c if is_pattern_whitespace(Some(c)) => { let start_bpos = self.last_pos; - while is_whitespace(self.curr) { + while is_pattern_whitespace(self.curr) { self.bump(); } let c = Some(TokenAndSpan { @@ -1435,7 +1436,7 @@ impl<'a> StringReader<'a> { } fn consume_whitespace(&mut self) { - while is_whitespace(self.curr) && !self.is_eof() { + while is_pattern_whitespace(self.curr) && !self.is_eof() { self.bump(); } } @@ -1460,7 +1461,7 @@ impl<'a> StringReader<'a> { } fn consume_non_eol_whitespace(&mut self) { - while is_whitespace(self.curr) && !self.curr_is('\n') && !self.is_eof() { + while is_pattern_whitespace(self.curr) && !self.curr_is('\n') && !self.is_eof() { self.bump(); } } @@ -1591,8 +1592,10 @@ impl<'a> StringReader<'a> { } } -pub fn is_whitespace(c: Option<char>) -> bool { - c.map_or(false, char::is_whitespace) +// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which +// is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3 +pub fn is_pattern_whitespace(c: Option<char>) -> bool { + c.map_or(false, Pattern_White_Space) } fn in_range(c: Option<char>, lo: char, hi: char) -> bool { |
