about summary refs log tree commit diff
path: root/src/libsyntax/parse
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2016-03-07 20:06:17 -0800
committerbors <bors@rust-lang.org>2016-03-07 20:06:17 -0800
commit8b7c3f20e83de20518abb879f7a0e41b6e93414f (patch)
treed04a59bc3bfb143972d35059a2da9b8052e13f86 /src/libsyntax/parse
parent4352a8554f3765b98d27c1a0b1a170414f82b199 (diff)
parent24578e0fe555f267bef40528b8ac79bc7e898007 (diff)
downloadrust-8b7c3f20e83de20518abb879f7a0e41b6e93414f.tar.gz
rust-8b7c3f20e83de20518abb879f7a0e41b6e93414f.zip
Auto merge of #29734 - Ryman:whitespace_consistency, r=Aatch
libsyntax: be more accepting of whitespace in lexer

Fixes #29590.

Perhaps this may need more thorough testing?

r? @Aatch
Diffstat (limited to 'src/libsyntax/parse')
-rw-r--r--src/libsyntax/parse/lexer/comments.rs4
-rw-r--r--src/libsyntax/parse/lexer/mod.rs20
2 files changed, 12 insertions, 12 deletions
diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs
index e336c98f03c..629edced804 100644
--- a/src/libsyntax/parse/lexer/comments.rs
+++ b/src/libsyntax/parse/lexer/comments.rs
@@ -15,7 +15,7 @@ use codemap::{BytePos, CharPos, CodeMap, Pos};
 use errors;
 use parse::lexer::is_block_doc_comment;
 use parse::lexer::{StringReader, TokenAndSpan};
-use parse::lexer::{is_whitespace, Reader};
+use parse::lexer::{is_pattern_whitespace, Reader};
 use parse::lexer;
 use print::pprust;
 use str::char_at;
@@ -153,7 +153,7 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
 }
 
 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, comments: &mut Vec<Comment>) {
-    while is_whitespace(rdr.curr) && !rdr.is_eof() {
+    while is_pattern_whitespace(rdr.curr) && !rdr.is_eof() {
         if rdr.col == CharPos(0) && rdr.curr_is('\n') {
             push_blank_line_comment(rdr, &mut *comments);
         }
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 3183dfbd954..a5cb5c7117e 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -16,6 +16,7 @@ use ext::tt::transcribe::tt_next_token;
 use parse::token::str_to_ident;
 use parse::token;
 use str::char_at;
+use rustc_unicode::property::Pattern_White_Space;
 
 use std::borrow::Cow;
 use std::char;
@@ -546,10 +547,10 @@ impl<'a> StringReader<'a> {
                 let c = self.scan_comment();
                 debug!("scanning a comment {:?}", c);
                 c
-            }
-            c if is_whitespace(Some(c)) => {
+            },
+            c if is_pattern_whitespace(Some(c)) => {
                 let start_bpos = self.last_pos;
-                while is_whitespace(self.curr) {
+                while is_pattern_whitespace(self.curr) {
                     self.bump();
                 }
                 let c = Some(TokenAndSpan {
@@ -1440,7 +1441,7 @@ impl<'a> StringReader<'a> {
     }
 
     fn consume_whitespace(&mut self) {
-        while is_whitespace(self.curr) && !self.is_eof() {
+        while is_pattern_whitespace(self.curr) && !self.is_eof() {
             self.bump();
         }
     }
@@ -1465,7 +1466,7 @@ impl<'a> StringReader<'a> {
     }
 
     fn consume_non_eol_whitespace(&mut self) {
-        while is_whitespace(self.curr) && !self.curr_is('\n') && !self.is_eof() {
+        while is_pattern_whitespace(self.curr) && !self.curr_is('\n') && !self.is_eof() {
             self.bump();
         }
     }
@@ -1596,11 +1597,10 @@ impl<'a> StringReader<'a> {
     }
 }
 
-pub fn is_whitespace(c: Option<char>) -> bool {
-    match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
-        ' ' | '\n' | '\t' | '\r' => true,
-        _ => false,
-    }
+// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
+// is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3
+pub fn is_pattern_whitespace(c: Option<char>) -> bool {
+    c.map_or(false, Pattern_White_Space)
 }
 
 fn in_range(c: Option<char>, lo: char, hi: char) -> bool {