diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_ast_passes/src/feature_gate.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_feature/src/unstable.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/cursor.rs | 14 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 155 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/tests.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_parse/messages.ftl | 13 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/errors.rs | 55 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 106 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol.rs | 1 |
9 files changed, 333 insertions, 16 deletions
diff --git a/compiler/rustc_ast_passes/src/feature_gate.rs b/compiler/rustc_ast_passes/src/feature_gate.rs index e312f15f05b..915613a3913 100644 --- a/compiler/rustc_ast_passes/src/feature_gate.rs +++ b/compiler/rustc_ast_passes/src/feature_gate.rs @@ -514,6 +514,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session, features: &Features) { gate_all!(contracts_internals, "contract internal machinery is for internal use only"); gate_all!(where_clause_attrs, "attributes in `where` clause are unstable"); gate_all!(super_let, "`super let` is experimental"); + gate_all!(frontmatter, "frontmatters are experimental"); if !visitor.features.never_patterns() { if let Some(spans) = spans.get(&sym::never_patterns) { diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs index 75e09cacb1f..f6fddfb4d67 100644 --- a/compiler/rustc_feature/src/unstable.rs +++ b/compiler/rustc_feature/src/unstable.rs @@ -506,6 +506,8 @@ declare_features! ( (incomplete, fn_delegation, "1.76.0", Some(118212)), /// Allows impls for the Freeze trait. (internal, freeze_impls, "1.78.0", Some(121675)), + /// Frontmatter `---` blocks for use by external tools. + (unstable, frontmatter, "CURRENT_RUSTC_VERSION", Some(136889)), /// Allows defining gen blocks and `gen fn`. (unstable, gen_blocks, "1.75.0", Some(117078)), /// Infer generic args for both consts and types. diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs index e0e3bd0e30b..526693d3de1 100644 --- a/compiler/rustc_lexer/src/cursor.rs +++ b/compiler/rustc_lexer/src/cursor.rs @@ -1,5 +1,10 @@ use std::str::Chars; +pub enum FrontmatterAllowed { + Yes, + No, +} + /// Peekable iterator over a char sequence. /// /// Next characters can be peeked via `first` method, @@ -8,6 +13,7 @@ pub struct Cursor<'a> { len_remaining: usize, /// Iterator over chars. Slightly faster than a &str. chars: Chars<'a>, + pub(crate) frontmatter_allowed: FrontmatterAllowed, #[cfg(debug_assertions)] prev: char, } @@ -15,10 +21,11 @@ pub struct Cursor<'a> { pub(crate) const EOF_CHAR: char = '\0'; impl<'a> Cursor<'a> { - pub fn new(input: &'a str) -> Cursor<'a> { + pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> { Cursor { len_remaining: input.len(), chars: input.chars(), + frontmatter_allowed, #[cfg(debug_assertions)] prev: EOF_CHAR, } @@ -95,6 +102,11 @@ impl<'a> Cursor<'a> { Some(c) } + /// Moves to a substring by a number of bytes. + pub(crate) fn bump_bytes(&mut self, n: usize) { + self.chars = self.as_str()[n..].chars(); + } + /// Eats symbols while predicate returns true or until the end of file is reached. pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { // It was tried making optimized version of this for eg. line comments, but diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index f9c71b2fa65..2374f388250 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -35,8 +35,8 @@ pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION; use self::LiteralKind::*; use self::TokenKind::*; -pub use crate::cursor::Cursor; use crate::cursor::EOF_CHAR; +pub use crate::cursor::{Cursor, FrontmatterAllowed}; /// Parsed token. /// It doesn't contain information about data that has been parsed, @@ -57,17 +57,27 @@ impl Token { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum TokenKind { /// A line comment, e.g. `// comment`. - LineComment { doc_style: Option<DocStyle> }, + LineComment { + doc_style: Option<DocStyle>, + }, /// A block comment, e.g. `/* block comment */`. /// /// Block comments can be recursive, so a sequence like `/* /* */` /// will not be considered terminated and will result in a parsing error. - BlockComment { doc_style: Option<DocStyle>, terminated: bool }, + BlockComment { + doc_style: Option<DocStyle>, + terminated: bool, + }, /// Any whitespace character sequence. Whitespace, + Frontmatter { + has_invalid_preceding_whitespace: bool, + invalid_infostring: bool, + }, + /// An identifier or keyword, e.g. `ident` or `continue`. Ident, @@ -109,10 +119,15 @@ pub enum TokenKind { /// this type will need to check for and reject that case. /// /// See [LiteralKind] for more details. - Literal { kind: LiteralKind, suffix_start: u32 }, + Literal { + kind: LiteralKind, + suffix_start: u32, + }, /// A lifetime, e.g. `'a`. - Lifetime { starts_with_number: bool }, + Lifetime { + starts_with_number: bool, + }, /// `;` Semi, @@ -280,7 +295,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> { #[inline] pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> { debug_assert!(!input.is_empty()); - let mut cursor = Cursor::new(input); + let mut cursor = Cursor::new(input, FrontmatterAllowed::No); // Move past the leading `r` or `br`. for _ in 0..prefix_len { cursor.bump().unwrap(); @@ -290,7 +305,7 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> /// Creates an iterator that produces tokens from the input string. pub fn tokenize(input: &str) -> impl Iterator<Item = Token> { - let mut cursor = Cursor::new(input); + let mut cursor = Cursor::new(input, FrontmatterAllowed::No); std::iter::from_fn(move || { let token = cursor.advance_token(); if token.kind != TokenKind::Eof { Some(token) } else { None } @@ -361,7 +376,34 @@ impl Cursor<'_> { Some(c) => c, None => return Token::new(TokenKind::Eof, 0), }; + let token_kind = match first_char { + c if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes) + && is_whitespace(c) => + { + let mut last = first_char; + while is_whitespace(self.first()) { + let Some(c) = self.bump() else { + break; + }; + last = c; + } + // invalid frontmatter opening as whitespace preceding it isn't newline. + // combine the whitespace and the frontmatter to a single token as we shall + // error later. + if last != '\n' && self.as_str().starts_with("---") { + self.bump(); + self.frontmatter(true) + } else { + Whitespace + } + } + '-' if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes) + && self.as_str().starts_with("--") => + { + // happy path + self.frontmatter(false) + } // Slash, comment or block comment. '/' => match self.first() { '/' => self.line_comment(), @@ -464,11 +506,110 @@ impl Cursor<'_> { c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(), _ => Unknown, }; + if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes) + && !matches!(token_kind, Whitespace) + { + // stop allowing frontmatters after first non-whitespace token + self.frontmatter_allowed = FrontmatterAllowed::No; + } let res = Token::new(token_kind, self.pos_within_token()); self.reset_pos_within_token(); res } + /// Given that one `-` was eaten, eat the rest of the frontmatter. + fn frontmatter(&mut self, has_invalid_preceding_whitespace: bool) -> TokenKind { + debug_assert_eq!('-', self.prev()); + + let pos = self.pos_within_token(); + self.eat_while(|c| c == '-'); + + // one `-` is eaten by the caller. + let length_opening = self.pos_within_token() - pos + 1; + + // must be ensured by the caller + debug_assert!(length_opening >= 3); + + // whitespace between the opening and the infostring. + self.eat_while(|ch| ch != '\n' && is_whitespace(ch)); + + // copied from `eat_identifier`, but allows `.` in infostring to allow something like + // `---Cargo.toml` as a valid opener + if is_id_start(self.first()) { + self.bump(); + self.eat_while(|c| is_id_continue(c) || c == '.'); + } + + self.eat_while(|ch| ch != '\n' && is_whitespace(ch)); + let invalid_infostring = self.first() != '\n'; + + let mut s = self.as_str(); + let mut found = false; + while let Some(closing) = s.find(&"-".repeat(length_opening as usize)) { + let preceding_chars_start = s[..closing].rfind("\n").map_or(0, |i| i + 1); + if s[preceding_chars_start..closing].chars().all(is_whitespace) { + // candidate found + self.bump_bytes(closing); + // in case like + // ---cargo + // --- blahblah + // or + // ---cargo + // ---- + // combine those stuff into this frontmatter token such that it gets detected later. + self.eat_until(b'\n'); + found = true; + break; + } else { + s = &s[closing + length_opening as usize..]; + } + } + + if !found { + // recovery strategy: a closing statement might have precending whitespace/newline + // but not have enough dashes to properly close. In this case, we eat until there, + // and report a mismatch in the parser. + let mut rest = self.as_str(); + // We can look for a shorter closing (starting with four dashes but closing with three) + // and other indications that Rust has started and the infostring has ended. + let mut potential_closing = rest + .find("\n---") + // n.b. only in the case where there are dashes, we move the index to the line where + // the dashes start as we eat to include that line. For other cases those are Rust code + // and not included in the frontmatter. + .map(|x| x + 1) + .or_else(|| rest.find("\nuse ")) + .or_else(|| rest.find("\n//!")) + .or_else(|| rest.find("\n#![")); + + if potential_closing.is_none() { + // a less fortunate recovery if all else fails which finds any dashes preceded by whitespace + // on a standalone line. Might be wrong. + while let Some(closing) = rest.find("---") { + let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1); + if rest[preceding_chars_start..closing].chars().all(is_whitespace) { + // candidate found + potential_closing = Some(closing); + break; + } else { + rest = &rest[closing + 3..]; + } + } + } + + if let Some(potential_closing) = potential_closing { + // bump to the potential closing, and eat everything on that line. + self.bump_bytes(potential_closing); + self.eat_until(b'\n'); + } else { + // eat everything. this will get reported as an unclosed frontmatter. + self.eat_while(|_| true); + } + } + + Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } + } + fn line_comment(&mut self) -> TokenKind { debug_assert!(self.prev() == '/' && self.first() == '/'); self.bump(); diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index 8203ae70b07..fc8d9b9d57b 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -4,7 +4,7 @@ use super::*; fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) { let s = &format!("r{}", s); - let mut cursor = Cursor::new(s); + let mut cursor = Cursor::new(s, FrontmatterAllowed::No); cursor.bump(); let res = cursor.raw_double_quoted_string(0); assert_eq!(res, expected); diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index ac4f7ed64e2..3e953e6c855 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -297,6 +297,19 @@ parse_forgot_paren = perhaps you forgot parentheses? parse_found_expr_would_be_stmt = expected expression, found `{$token}` .label = expected expression +parse_frontmatter_extra_characters_after_close = extra characters after frontmatter close are not allowed +parse_frontmatter_invalid_close_preceding_whitespace = invalid preceding whitespace for frontmatter close + .note = frontmatter close should not be preceded by whitespace +parse_frontmatter_invalid_infostring = invalid infostring for frontmatter + .note = frontmatter infostrings must be a single identifier immediately following the opening +parse_frontmatter_invalid_opening_preceding_whitespace = invalid preceding whitespace for frontmatter opening + .note = frontmatter opening should not be preceded by whitespace +parse_frontmatter_length_mismatch = frontmatter close does not match the opening + .label_opening = the opening here has {$len_opening} dashes... + .label_close = ...while the close has {$len_close} dashes +parse_frontmatter_unclosed = unclosed frontmatter + .note = frontmatter opening here was not closed + parse_function_body_equals_expr = function body cannot be `= expression;` .suggestion = surround the expression with `{"{"}` and `{"}"}` instead of `=` and `;` diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 6a6fb0eb9b5..9e5c81d44a5 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -736,6 +736,61 @@ pub(crate) struct FoundExprWouldBeStmt { } #[derive(Diagnostic)] +#[diag(parse_frontmatter_extra_characters_after_close)] +pub(crate) struct FrontmatterExtraCharactersAfterClose { + #[primary_span] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_invalid_infostring)] +#[note] +pub(crate) struct FrontmatterInvalidInfostring { + #[primary_span] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_invalid_opening_preceding_whitespace)] +pub(crate) struct FrontmatterInvalidOpeningPrecedingWhitespace { + #[primary_span] + pub span: Span, + #[note] + pub note_span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_unclosed)] +pub(crate) struct FrontmatterUnclosed { + #[primary_span] + pub span: Span, + #[note] + pub note_span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_invalid_close_preceding_whitespace)] +pub(crate) struct FrontmatterInvalidClosingPrecedingWhitespace { + #[primary_span] + pub span: Span, + #[note] + pub note_span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_length_mismatch)] +pub(crate) struct FrontmatterLengthMismatch { + #[primary_span] + pub span: Span, + #[label(parse_label_opening)] + pub opening: Span, + #[label(parse_label_close)] + pub close: Span, + pub len_opening: usize, + pub len_close: usize, +} + +#[derive(Diagnostic)] #[diag(parse_leading_plus_not_supported)] pub(crate) struct LeadingPlusNotSupported { #[primary_span] diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index e8a5cae54cf..78c5742414b 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -7,7 +7,9 @@ use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; -use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; +use rustc_lexer::{ + Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace, +}; use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode}; use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ @@ -15,7 +17,7 @@ use rustc_session::lint::builtin::{ TEXT_DIRECTION_CODEPOINT_IN_COMMENT, }; use rustc_session::parse::ParseSess; -use rustc_span::{BytePos, Pos, Span, Symbol}; +use rustc_span::{BytePos, Pos, Span, Symbol, sym}; use tracing::debug; use crate::errors; @@ -56,7 +58,7 @@ pub(crate) fn lex_token_trees<'psess, 'src>( start_pos = start_pos + BytePos::from_usize(shebang_len); } - let cursor = Cursor::new(src); + let cursor = Cursor::new(src, FrontmatterAllowed::Yes); let mut lexer = Lexer { psess, start_pos, @@ -193,6 +195,11 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let content = self.str_from_to(content_start, content_end); self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) } + rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => { + self.validate_frontmatter(start, has_invalid_preceding_whitespace, invalid_infostring); + preceded_by_whitespace = true; + continue; + } rustc_lexer::TokenKind::Whitespace => { preceded_by_whitespace = true; continue; @@ -256,7 +263,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // was consumed. let lit_start = start + BytePos(prefix_len); self.pos = lit_start; - self.cursor = Cursor::new(&str_before[prefix_len as usize..]); + self.cursor = Cursor::new(&str_before[prefix_len as usize..], FrontmatterAllowed::No); self.report_unknown_prefix(start); let prefix_span = self.mk_sp(start, lit_start); return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace); @@ -361,7 +368,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // Reset the state so we just lex the `'r`. let lt_start = start + BytePos(2); self.pos = lt_start; - self.cursor = Cursor::new(&str_before[2 as usize..]); + self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No); let lifetime_name = self.str_from(start); let ident = Symbol::intern(lifetime_name); @@ -474,6 +481,91 @@ impl<'psess, 'src> Lexer<'psess, 'src> { } } + fn validate_frontmatter( + &self, + start: BytePos, + has_invalid_preceding_whitespace: bool, + invalid_infostring: bool, + ) { + let s = self.str_from(start); + let real_start = s.find("---").unwrap(); + let frontmatter_opening_pos = BytePos(real_start as u32) + start; + let s_new = &s[real_start..]; + let within = s_new.trim_start_matches('-'); + let len_opening = s_new.len() - within.len(); + + let frontmatter_opening_end_pos = frontmatter_opening_pos + BytePos(len_opening as u32); + if has_invalid_preceding_whitespace { + let line_start = + BytePos(s[..real_start].rfind("\n").map_or(0, |i| i as u32 + 1)) + start; + let span = self.mk_sp(line_start, frontmatter_opening_end_pos); + let label_span = self.mk_sp(line_start, frontmatter_opening_pos); + self.dcx().emit_err(errors::FrontmatterInvalidOpeningPrecedingWhitespace { + span, + note_span: label_span, + }); + } + + if invalid_infostring { + let line_end = s[real_start..].find('\n').unwrap_or(s[real_start..].len()); + let span = self.mk_sp( + frontmatter_opening_end_pos, + frontmatter_opening_pos + BytePos(line_end as u32), + ); + self.dcx().emit_err(errors::FrontmatterInvalidInfostring { span }); + } + + let last_line_start = within.rfind('\n').map_or(0, |i| i + 1); + let last_line = &within[last_line_start..]; + let last_line_trimmed = last_line.trim_start_matches(is_whitespace); + let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32); + + let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos); + self.psess.gated_spans.gate(sym::frontmatter, frontmatter_span); + + if !last_line_trimmed.starts_with("---") { + let label_span = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos); + self.dcx().emit_err(errors::FrontmatterUnclosed { + span: frontmatter_span, + note_span: label_span, + }); + return; + } + + if last_line_trimmed.len() != last_line.len() { + let line_end = last_line_start_pos + BytePos(last_line.len() as u32); + let span = self.mk_sp(last_line_start_pos, line_end); + let whitespace_end = + last_line_start_pos + BytePos((last_line.len() - last_line_trimmed.len()) as u32); + let label_span = self.mk_sp(last_line_start_pos, whitespace_end); + self.dcx().emit_err(errors::FrontmatterInvalidClosingPrecedingWhitespace { + span, + note_span: label_span, + }); + } + + let rest = last_line_trimmed.trim_start_matches('-'); + let len_close = last_line_trimmed.len() - rest.len(); + if len_close != len_opening { + let span = self.mk_sp(frontmatter_opening_pos, self.pos); + let opening = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos); + let last_line_close_pos = last_line_start_pos + BytePos(len_close as u32); + let close = self.mk_sp(last_line_start_pos, last_line_close_pos); + self.dcx().emit_err(errors::FrontmatterLengthMismatch { + span, + opening, + close, + len_opening, + len_close, + }); + } + + if !rest.trim_matches(is_whitespace).is_empty() { + let span = self.mk_sp(last_line_start_pos, self.pos); + self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span }); + } + } + fn cook_doc_comment( &self, content_start: BytePos, @@ -839,7 +931,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let space_pos = start + BytePos(1); let space_span = self.mk_sp(space_pos, space_pos); - let mut cursor = Cursor::new(str_before); + let mut cursor = Cursor::new(str_before, FrontmatterAllowed::No); let (is_string, span, unterminated) = match cursor.guarded_double_quoted_string() { Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => { @@ -905,7 +997,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // For backwards compatibility, roll back to after just the first `#` // and return the `Pound` token. self.pos = start + BytePos(1); - self.cursor = Cursor::new(&str_before[1..]); + self.cursor = Cursor::new(&str_before[1..], FrontmatterAllowed::No); token::Pound } } diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index ba3e6d7ca82..d7dbdf04a8c 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1047,6 +1047,7 @@ symbols! { from_u16, from_usize, from_yeet, + frontmatter, fs_create_dir, fsub_algebraic, fsub_fast, |
