about summary refs log tree commit diff
path: root/compiler/rustc_parse/src
diff options
context:
space:
mode:
authorEd Page <eopage@gmail.com>2025-08-28 15:15:41 -0500
committerEd Page <eopage@gmail.com>2025-09-01 20:51:39 -0500
commit6f0da976c5f422ff5b1124d409e7d7e063fc4225 (patch)
treefd78a411df7eb377af1978a1e8aa41c99009cbf7 /compiler/rustc_parse/src
parent428e413414e0719a7758676ac49e07b85eb86dec (diff)
downloadrust-6f0da976c5f422ff5b1124d409e7d7e063fc4225.tar.gz
rust-6f0da976c5f422ff5b1124d409e7d7e063fc4225.zip
fix(lexer): Only allow horizontal whitespace in frontmatter
In writing up the reference for frontmatter, I realized that we probably
shouldn't be accepting Unicode Line Ending characters between the code
fence and infostring or trailing after the infostring or a code fence.

In digging into the unicode specification we use for Whitespace, it
divides it up into categories, so I'm deferring to what it says for
horizontal whitespace for what should be used within a line.

Note, I am leaving out support for Unicde Default Ignorable characters.
I figure that can be discussed outside of this change within the
reference and tracking issue.
Diffstat (limited to 'compiler/rustc_parse/src')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs6
1 files changed, 3 insertions, 3 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 9792240a548..e3bd6a9a327 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -6,7 +6,7 @@ use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_contr
 use rustc_errors::codes::*;
 use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
 use rustc_lexer::{
-    Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
+    Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_horizontal_whitespace,
 };
 use rustc_literal_escaper::{EscapeError, Mode, check_for_errors};
 use rustc_session::lint::BuiltinLintDiag;
@@ -597,7 +597,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
 
         let last_line_start = within.rfind('\n').map_or(0, |i| i + 1);
         let last_line = &within[last_line_start..];
-        let last_line_trimmed = last_line.trim_start_matches(is_whitespace);
+        let last_line_trimmed = last_line.trim_start_matches(is_horizontal_whitespace);
         let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32);
 
         let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos);
@@ -640,7 +640,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
             });
         }
 
-        if !rest.trim_matches(is_whitespace).is_empty() {
+        if !rest.trim_matches(is_horizontal_whitespace).is_empty() {
             let span = self.mk_sp(last_line_start_pos, self.pos);
             self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span });
         }