diff options
Diffstat (limited to 'compiler/rustc_parse/src')
| -rw-r--r-- | compiler/rustc_parse/src/errors.rs | 55 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 106 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 53 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mut_visit/tests.rs | 65 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/tests.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/validate_attr.rs | 73 |
6 files changed, 259 insertions, 99 deletions
diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 6a6fb0eb9b5..9e5c81d44a5 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -736,6 +736,61 @@ pub(crate) struct FoundExprWouldBeStmt { } #[derive(Diagnostic)] +#[diag(parse_frontmatter_extra_characters_after_close)] +pub(crate) struct FrontmatterExtraCharactersAfterClose { + #[primary_span] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_invalid_infostring)] +#[note] +pub(crate) struct FrontmatterInvalidInfostring { + #[primary_span] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_invalid_opening_preceding_whitespace)] +pub(crate) struct FrontmatterInvalidOpeningPrecedingWhitespace { + #[primary_span] + pub span: Span, + #[note] + pub note_span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_unclosed)] +pub(crate) struct FrontmatterUnclosed { + #[primary_span] + pub span: Span, + #[note] + pub note_span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_invalid_close_preceding_whitespace)] +pub(crate) struct FrontmatterInvalidClosingPrecedingWhitespace { + #[primary_span] + pub span: Span, + #[note] + pub note_span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_frontmatter_length_mismatch)] +pub(crate) struct FrontmatterLengthMismatch { + #[primary_span] + pub span: Span, + #[label(parse_label_opening)] + pub opening: Span, + #[label(parse_label_close)] + pub close: Span, + pub len_opening: usize, + pub len_close: usize, +} + +#[derive(Diagnostic)] #[diag(parse_leading_plus_not_supported)] pub(crate) struct LeadingPlusNotSupported { #[primary_span] diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index e8a5cae54cf..78c5742414b 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -7,7 +7,9 @@ use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; -use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; +use rustc_lexer::{ + Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace, +}; use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode}; use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ @@ -15,7 +17,7 @@ use rustc_session::lint::builtin::{ TEXT_DIRECTION_CODEPOINT_IN_COMMENT, }; use rustc_session::parse::ParseSess; -use rustc_span::{BytePos, Pos, Span, Symbol}; +use rustc_span::{BytePos, Pos, Span, Symbol, sym}; use tracing::debug; use crate::errors; @@ -56,7 +58,7 @@ pub(crate) fn lex_token_trees<'psess, 'src>( start_pos = start_pos + BytePos::from_usize(shebang_len); } - let cursor = Cursor::new(src); + let cursor = Cursor::new(src, FrontmatterAllowed::Yes); let mut lexer = Lexer { psess, start_pos, @@ -193,6 +195,11 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let content = self.str_from_to(content_start, content_end); self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) } + rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => { + self.validate_frontmatter(start, has_invalid_preceding_whitespace, invalid_infostring); + preceded_by_whitespace = true; + continue; + } rustc_lexer::TokenKind::Whitespace => { preceded_by_whitespace = true; continue; @@ -256,7 +263,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // was consumed. let lit_start = start + BytePos(prefix_len); self.pos = lit_start; - self.cursor = Cursor::new(&str_before[prefix_len as usize..]); + self.cursor = Cursor::new(&str_before[prefix_len as usize..], FrontmatterAllowed::No); self.report_unknown_prefix(start); let prefix_span = self.mk_sp(start, lit_start); return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace); @@ -361,7 +368,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // Reset the state so we just lex the `'r`. let lt_start = start + BytePos(2); self.pos = lt_start; - self.cursor = Cursor::new(&str_before[2 as usize..]); + self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No); let lifetime_name = self.str_from(start); let ident = Symbol::intern(lifetime_name); @@ -474,6 +481,91 @@ impl<'psess, 'src> Lexer<'psess, 'src> { } } + fn validate_frontmatter( + &self, + start: BytePos, + has_invalid_preceding_whitespace: bool, + invalid_infostring: bool, + ) { + let s = self.str_from(start); + let real_start = s.find("---").unwrap(); + let frontmatter_opening_pos = BytePos(real_start as u32) + start; + let s_new = &s[real_start..]; + let within = s_new.trim_start_matches('-'); + let len_opening = s_new.len() - within.len(); + + let frontmatter_opening_end_pos = frontmatter_opening_pos + BytePos(len_opening as u32); + if has_invalid_preceding_whitespace { + let line_start = + BytePos(s[..real_start].rfind("\n").map_or(0, |i| i as u32 + 1)) + start; + let span = self.mk_sp(line_start, frontmatter_opening_end_pos); + let label_span = self.mk_sp(line_start, frontmatter_opening_pos); + self.dcx().emit_err(errors::FrontmatterInvalidOpeningPrecedingWhitespace { + span, + note_span: label_span, + }); + } + + if invalid_infostring { + let line_end = s[real_start..].find('\n').unwrap_or(s[real_start..].len()); + let span = self.mk_sp( + frontmatter_opening_end_pos, + frontmatter_opening_pos + BytePos(line_end as u32), + ); + self.dcx().emit_err(errors::FrontmatterInvalidInfostring { span }); + } + + let last_line_start = within.rfind('\n').map_or(0, |i| i + 1); + let last_line = &within[last_line_start..]; + let last_line_trimmed = last_line.trim_start_matches(is_whitespace); + let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32); + + let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos); + self.psess.gated_spans.gate(sym::frontmatter, frontmatter_span); + + if !last_line_trimmed.starts_with("---") { + let label_span = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos); + self.dcx().emit_err(errors::FrontmatterUnclosed { + span: frontmatter_span, + note_span: label_span, + }); + return; + } + + if last_line_trimmed.len() != last_line.len() { + let line_end = last_line_start_pos + BytePos(last_line.len() as u32); + let span = self.mk_sp(last_line_start_pos, line_end); + let whitespace_end = + last_line_start_pos + BytePos((last_line.len() - last_line_trimmed.len()) as u32); + let label_span = self.mk_sp(last_line_start_pos, whitespace_end); + self.dcx().emit_err(errors::FrontmatterInvalidClosingPrecedingWhitespace { + span, + note_span: label_span, + }); + } + + let rest = last_line_trimmed.trim_start_matches('-'); + let len_close = last_line_trimmed.len() - rest.len(); + if len_close != len_opening { + let span = self.mk_sp(frontmatter_opening_pos, self.pos); + let opening = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos); + let last_line_close_pos = last_line_start_pos + BytePos(len_close as u32); + let close = self.mk_sp(last_line_start_pos, last_line_close_pos); + self.dcx().emit_err(errors::FrontmatterLengthMismatch { + span, + opening, + close, + len_opening, + len_close, + }); + } + + if !rest.trim_matches(is_whitespace).is_empty() { + let span = self.mk_sp(last_line_start_pos, self.pos); + self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span }); + } + } + fn cook_doc_comment( &self, content_start: BytePos, @@ -839,7 +931,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let space_pos = start + BytePos(1); let space_span = self.mk_sp(space_pos, space_pos); - let mut cursor = Cursor::new(str_before); + let mut cursor = Cursor::new(str_before, FrontmatterAllowed::No); let (is_string, span, unterminated) = match cursor.guarded_double_quoted_string() { Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => { @@ -905,7 +997,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // For backwards compatibility, roll back to after just the first `#` // and return the `Pound` token. self.pos = start + BytePos(1); - self.cursor = Cursor::new(&str_before[1..]); + self.cursor = Cursor::new(&str_before[1..], FrontmatterAllowed::No); token::Pound } } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 5500fba58a5..968376678f3 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -56,19 +56,64 @@ mod tests; mod tokenstream { mod tests; } -#[cfg(test)] -mod mut_visit { - mod tests; -} bitflags::bitflags! { + /// Restrictions applied while parsing. + /// + /// The parser maintains a bitset of restrictions it will honor while + /// parsing. This is essentially used as a way of tracking state of what + /// is being parsed and to change behavior based on that. #[derive(Clone, Copy, Debug)] struct Restrictions: u8 { + /// Restricts expressions for use in statement position. + /// + /// When expressions are used in various places, like statements or + /// match arms, this is used to stop parsing once certain tokens are + /// reached. + /// + /// For example, `if true {} & 1` with `STMT_EXPR` in effect is parsed + /// as two separate expression statements (`if` and a reference to 1). + /// Otherwise it is parsed as a bitwise AND where `if` is on the left + /// and 1 is on the right. const STMT_EXPR = 1 << 0; + /// Do not allow struct literals. + /// + /// There are several places in the grammar where we don't want to + /// allow struct literals because they can require lookahead, or + /// otherwise could be ambiguous or cause confusion. For example, + /// `if Foo {} {}` isn't clear if it is `Foo{}` struct literal, or + /// just `Foo` is the condition, followed by a consequent block, + /// followed by an empty block. + /// + /// See [RFC 92](https://rust-lang.github.io/rfcs/0092-struct-grammar.html). const NO_STRUCT_LITERAL = 1 << 1; + /// Used to provide better error messages for const generic arguments. + /// + /// An un-braced const generic argument is limited to a very small + /// subset of expressions. This is used to detect the situation where + /// an expression outside of that subset is used, and to suggest to + /// wrap the expression in braces. const CONST_EXPR = 1 << 2; + /// Allows `let` expressions. + /// + /// `let pattern = scrutinee` is parsed as an expression, but it is + /// only allowed in let chains (`if` and `while` conditions). + /// Otherwise it is not an expression (note that `let` in statement + /// positions is treated as a `StmtKind::Let` statement, which has a + /// slightly different grammar). const ALLOW_LET = 1 << 3; + /// Used to detect a missing `=>` in a match guard. + /// + /// This is used for error handling in a match guard to give a better + /// error message if the `=>` is missing. It is set when parsing the + /// guard expression. const IN_IF_GUARD = 1 << 4; + /// Used to detect the incorrect use of expressions in patterns. + /// + /// This is used for error handling while parsing a pattern. During + /// error recovery, this will be set to try to parse the pattern as an + /// expression, but halts parsing the expression when reaching certain + /// tokens like `=`. const IS_PAT = 1 << 5; } } diff --git a/compiler/rustc_parse/src/parser/mut_visit/tests.rs b/compiler/rustc_parse/src/parser/mut_visit/tests.rs deleted file mode 100644 index 46c678c3902..00000000000 --- a/compiler/rustc_parse/src/parser/mut_visit/tests.rs +++ /dev/null @@ -1,65 +0,0 @@ -use rustc_ast as ast; -use rustc_ast::mut_visit::MutVisitor; -use rustc_ast_pretty::pprust; -use rustc_span::{Ident, create_default_session_globals_then}; - -use crate::parser::tests::{matches_codepattern, string_to_crate}; - -// This version doesn't care about getting comments or doc-strings in. -fn print_crate_items(krate: &ast::Crate) -> String { - krate.items.iter().map(|i| pprust::item_to_string(i)).collect::<Vec<_>>().join(" ") -} - -// Change every identifier to "zz". -struct ToZzIdentMutVisitor; - -impl MutVisitor for ToZzIdentMutVisitor { - const VISIT_TOKENS: bool = true; - - fn visit_ident(&mut self, ident: &mut Ident) { - *ident = Ident::from_str("zz"); - } -} - -macro_rules! assert_matches_codepattern { - ($a:expr , $b:expr) => {{ - let a_val = $a; - let b_val = $b; - if !matches_codepattern(&a_val, &b_val) { - panic!("expected args satisfying `matches_codepattern`, got {} and {}", a_val, b_val); - } - }}; -} - -// Make sure idents get transformed everywhere. -#[test] -fn ident_transformation() { - create_default_session_globals_then(|| { - let mut zz_visitor = ToZzIdentMutVisitor; - let mut krate = - string_to_crate("#[a] mod b {fn c (d : e, f : g) {h!(i,j,k);l;m}}".to_string()); - zz_visitor.visit_crate(&mut krate); - assert_matches_codepattern!( - print_crate_items(&krate), - "#[zz]mod zz{fn zz(zz:zz,zz:zz){zz!(zz,zz,zz);zz;zz}}".to_string() - ); - }) -} - -// Make sure idents get transformed even inside macro defs. -#[test] -fn ident_transformation_in_defs() { - create_default_session_globals_then(|| { - let mut zz_visitor = ToZzIdentMutVisitor; - let mut krate = string_to_crate( - "macro_rules! a {(b $c:expr $(d $e:token)f+ => \ - (g $(d $d $e)+))} " - .to_string(), - ); - zz_visitor.visit_crate(&mut krate); - assert_matches_codepattern!( - print_crate_items(&krate), - "macro_rules! zz{(zz$zz:zz$(zz $zz:zz)zz+=>(zz$(zz$zz$zz)+))}".to_string() - ); - }) -} diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs index 8285070839a..2a44c90abc1 100644 --- a/compiler/rustc_parse/src/parser/tests.rs +++ b/compiler/rustc_parse/src/parser/tests.rs @@ -95,12 +95,6 @@ pub(crate) fn string_to_stream(source_str: String) -> TokenStream { )) } -/// Parses a string, returns a crate. -pub(crate) fn string_to_crate(source_str: String) -> ast::Crate { - let psess = psess(); - with_error_checking_parse(source_str, &psess, |p| p.parse_crate_mod()) -} - /// Does the given string match the pattern? whitespace in the first string /// may be deleted or replaced with other whitespace to match the pattern. /// This function is relatively Unicode-ignorant; fortunately, the careful design diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs index 6a1c2af48ed..378cbb84637 100644 --- a/compiler/rustc_parse/src/validate_attr.rs +++ b/compiler/rustc_parse/src/validate_attr.rs @@ -3,7 +3,8 @@ use rustc_ast::token::Delimiter; use rustc_ast::tokenstream::DelimSpan; use rustc_ast::{ - self as ast, AttrArgs, Attribute, DelimArgs, MetaItem, MetaItemInner, MetaItemKind, Safety, + self as ast, AttrArgs, Attribute, DelimArgs, MetaItem, MetaItemInner, MetaItemKind, NodeId, + Safety, }; use rustc_errors::{Applicability, FatalError, PResult}; use rustc_feature::{AttributeSafety, AttributeTemplate, BUILTIN_ATTRIBUTE_MAP, BuiltinAttribute}; @@ -15,21 +16,19 @@ use rustc_span::{Span, Symbol, sym}; use crate::{errors, parse_in}; -pub fn check_attr(psess: &ParseSess, attr: &Attribute) { +pub fn check_attr(psess: &ParseSess, attr: &Attribute, id: NodeId) { if attr.is_doc_comment() || attr.has_name(sym::cfg_trace) || attr.has_name(sym::cfg_attr_trace) { return; } - let attr_info = attr.ident().and_then(|ident| BUILTIN_ATTRIBUTE_MAP.get(&ident.name)); - let attr_item = attr.get_normal_item(); + let builtin_attr_info = attr.ident().and_then(|ident| BUILTIN_ATTRIBUTE_MAP.get(&ident.name)); - // All non-builtin attributes are considered safe - let safety = attr_info.map(|x| x.safety).unwrap_or(AttributeSafety::Normal); - check_attribute_safety(psess, safety, attr); + let builtin_attr_safety = builtin_attr_info.map(|x| x.safety); + check_attribute_safety(psess, builtin_attr_safety, attr, id); // Check input tokens for built-in and key-value attributes. - match attr_info { + match builtin_attr_info { // `rustc_dummy` doesn't have any restrictions specific to built-in attributes. Some(BuiltinAttribute { name, template, .. }) if *name != sym::rustc_dummy => { match parse_meta(psess, attr) { @@ -43,6 +42,7 @@ pub fn check_attr(psess: &ParseSess, attr: &Attribute) { } } _ => { + let attr_item = attr.get_normal_item(); if let AttrArgs::Eq { .. } = attr_item.args { // All key-value attributes are restricted to meta-item syntax. match parse_meta(psess, attr) { @@ -154,11 +154,23 @@ fn is_attr_template_compatible(template: &AttributeTemplate, meta: &ast::MetaIte } } -pub fn check_attribute_safety(psess: &ParseSess, safety: AttributeSafety, attr: &Attribute) { +pub fn check_attribute_safety( + psess: &ParseSess, + builtin_attr_safety: Option<AttributeSafety>, + attr: &Attribute, + id: NodeId, +) { let attr_item = attr.get_normal_item(); + match (builtin_attr_safety, attr_item.unsafety) { + // - Unsafe builtin attribute + // - User wrote `#[unsafe(..)]`, which is permitted on any edition + (Some(AttributeSafety::Unsafe { .. }), Safety::Unsafe(..)) => { + // OK + } - if let AttributeSafety::Unsafe { unsafe_since } = safety { - if let ast::Safety::Default = attr_item.unsafety { + // - Unsafe builtin attribute + // - User did not write `#[unsafe(..)]` + (Some(AttributeSafety::Unsafe { unsafe_since }), Safety::Default) => { let path_span = attr_item.path.span; // If the `attr_item`'s span is not from a macro, then just suggest @@ -185,7 +197,7 @@ pub fn check_attribute_safety(psess: &ParseSess, safety: AttributeSafety, attr: psess.buffer_lint( UNSAFE_ATTR_OUTSIDE_UNSAFE, path_span, - ast::CRATE_NODE_ID, + id, BuiltinLintDiag::UnsafeAttrOutsideUnsafe { attribute_name_span: path_span, sugg_spans: (diag_span.shrink_to_lo(), diag_span.shrink_to_hi()), @@ -193,11 +205,38 @@ pub fn check_attribute_safety(psess: &ParseSess, safety: AttributeSafety, attr: ); } } - } else if let Safety::Unsafe(unsafe_span) = attr_item.unsafety { - psess.dcx().emit_err(errors::InvalidAttrUnsafe { - span: unsafe_span, - name: attr_item.path.clone(), - }); + + // - Normal builtin attribute, or any non-builtin attribute + // - All non-builtin attributes are currently considered safe; writing `#[unsafe(..)]` is + // not permitted on non-builtin attributes or normal builtin attributes + (Some(AttributeSafety::Normal) | None, Safety::Unsafe(unsafe_span)) => { + psess.dcx().emit_err(errors::InvalidAttrUnsafe { + span: unsafe_span, + name: attr_item.path.clone(), + }); + } + + // - Normal builtin attribute + // - No explicit `#[unsafe(..)]` written. + (Some(AttributeSafety::Normal), Safety::Default) => { + // OK + } + + // - Non-builtin attribute + // - No explicit `#[unsafe(..)]` written. + (None, Safety::Default) => { + // OK + } + + ( + Some(AttributeSafety::Unsafe { .. } | AttributeSafety::Normal) | None, + Safety::Safe(..), + ) => { + psess.dcx().span_delayed_bug( + attr_item.span(), + "`check_attribute_safety` does not expect `Safety::Safe` on attributes", + ); + } } } |
