about summary refs log tree commit diff
path: root/compiler/rustc_lexer/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_lexer/src/lib.rs')
-rw-r--r--compiler/rustc_lexer/src/lib.rs43
1 files changed, 28 insertions, 15 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index d10b1920343..c29ab569f47 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -331,24 +331,37 @@ pub fn is_whitespace(c: char) -> bool {
 
     matches!(
         c,
-        // Usual ASCII suspects
-        '\u{0009}'   // \t
-        | '\u{000A}' // \n
+        // End-of-line characters
+        | '\u{000A}' // line feed (\n)
         | '\u{000B}' // vertical tab
         | '\u{000C}' // form feed
-        | '\u{000D}' // \r
-        | '\u{0020}' // space
-
-        // NEXT LINE from latin1
-        | '\u{0085}'
+        | '\u{000D}' // carriage return (\r)
+        | '\u{0085}' // next line (from latin1)
+        | '\u{2028}' // LINE SEPARATOR
+        | '\u{2029}' // PARAGRAPH SEPARATOR
 
-        // Bidi markers
+        // `Default_Ignorable_Code_Point` characters
         | '\u{200E}' // LEFT-TO-RIGHT MARK
         | '\u{200F}' // RIGHT-TO-LEFT MARK
 
-        // Dedicated whitespace characters from Unicode
-        | '\u{2028}' // LINE SEPARATOR
-        | '\u{2029}' // PARAGRAPH SEPARATOR
+        // Horizontal space characters
+        | '\u{0009}'   // tab (\t)
+        | '\u{0020}' // space
+    )
+}
+
+/// True if `c` is considered horizontal whitespace according to Rust language definition.
+pub fn is_horizontal_whitespace(c: char) -> bool {
+    // This is Pattern_White_Space.
+    //
+    // Note that this set is stable (ie, it doesn't change with different
+    // Unicode versions), so it's ok to just hard-code the values.
+
+    matches!(
+        c,
+        // Horizontal space characters
+        '\u{0009}'   // tab (\t)
+        | '\u{0020}' // space
     )
 }
 
@@ -538,7 +551,7 @@ impl Cursor<'_> {
         debug_assert!(length_opening >= 3);
 
         // whitespace between the opening and the infostring.
-        self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
+        self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
 
         // copied from `eat_identifier`, but allows `-` and `.` in infostring to allow something like
         // `---Cargo.toml` as a valid opener
@@ -547,7 +560,7 @@ impl Cursor<'_> {
             self.eat_while(|c| is_id_continue(c) || c == '-' || c == '.');
         }
 
-        self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
+        self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
         let invalid_infostring = self.first() != '\n';
 
         let mut found = false;
@@ -588,7 +601,7 @@ impl Cursor<'_> {
                 // on a standalone line. Might be wrong.
                 while let Some(closing) = rest.find("---") {
                     let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
-                    if rest[preceding_chars_start..closing].chars().all(is_whitespace) {
+                    if rest[preceding_chars_start..closing].chars().all(is_horizontal_whitespace) {
                         // candidate found
                         potential_closing = Some(closing);
                         break;