6 files changed, 259 insertions, 99 deletions
diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs
index 6a6fb0eb9b5..9e5c81d44a5 100644
--- a/compiler/rustc_parse/src/errors.rs
+++ b/compiler/rustc_parse/src/errors.rs
@@ -736,6 +736,61 @@ pub(crate) struct FoundExprWouldBeStmt {
 }
 
 #[derive(Diagnostic)]
+#[diag(parse_frontmatter_extra_characters_after_close)]
+pub(crate) struct FrontmatterExtraCharactersAfterClose {
+    #[primary_span]
+    pub span: Span,
+}
+
+#[derive(Diagnostic)]
+#[diag(parse_frontmatter_invalid_infostring)]
+#[note]
+pub(crate) struct FrontmatterInvalidInfostring {
+    #[primary_span]
+    pub span: Span,
+}
+
+#[derive(Diagnostic)]
+#[diag(parse_frontmatter_invalid_opening_preceding_whitespace)]
+pub(crate) struct FrontmatterInvalidOpeningPrecedingWhitespace {
+    #[primary_span]
+    pub span: Span,
+    #[note]
+    pub note_span: Span,
+}
+
+#[derive(Diagnostic)]
+#[diag(parse_frontmatter_unclosed)]
+pub(crate) struct FrontmatterUnclosed {
+    #[primary_span]
+    pub span: Span,
+    #[note]
+    pub note_span: Span,
+}
+
+#[derive(Diagnostic)]
+#[diag(parse_frontmatter_invalid_close_preceding_whitespace)]
+pub(crate) struct FrontmatterInvalidClosingPrecedingWhitespace {
+    #[primary_span]
+    pub span: Span,
+    #[note]
+    pub note_span: Span,
+}
+
+#[derive(Diagnostic)]
+#[diag(parse_frontmatter_length_mismatch)]
+pub(crate) struct FrontmatterLengthMismatch {
+    #[primary_span]
+    pub span: Span,
+    #[label(parse_label_opening)]
+    pub opening: Span,
+    #[label(parse_label_close)]
+    pub close: Span,
+    pub len_opening: usize,
+    pub len_close: usize,
+}
+
+#[derive(Diagnostic)]
 #[diag(parse_leading_plus_not_supported)]
 pub(crate) struct LeadingPlusNotSupported {
     #[primary_span]
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index e8a5cae54cf..78c5742414b 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -7,7 +7,9 @@ use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
 use rustc_errors::codes::*;
 use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
-use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
+use rustc_lexer::{
+    Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
+};
 use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode};
 use rustc_session::lint::BuiltinLintDiag;
 use rustc_session::lint::builtin::{
@@ -15,7 +17,7 @@ use rustc_session::lint::builtin::{
     TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
 };
 use rustc_session::parse::ParseSess;
-use rustc_span::{BytePos, Pos, Span, Symbol};
+use rustc_span::{BytePos, Pos, Span, Symbol, sym};
 use tracing::debug;
 
 use crate::errors;
@@ -56,7 +58,7 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
         start_pos = start_pos + BytePos::from_usize(shebang_len);
     }
 
-    let cursor = Cursor::new(src);
+    let cursor = Cursor::new(src, FrontmatterAllowed::Yes);
     let mut lexer = Lexer {
         psess,
         start_pos,
@@ -193,6 +195,11 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     let content = self.str_from_to(content_start, content_end);
                     self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
                 }
+                rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => {
+                    self.validate_frontmatter(start, has_invalid_preceding_whitespace, invalid_infostring);
+                    preceded_by_whitespace = true;
+                    continue;
+                }
                 rustc_lexer::TokenKind::Whitespace => {
                     preceded_by_whitespace = true;
                     continue;
@@ -256,7 +263,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     // was consumed.
                     let lit_start = start + BytePos(prefix_len);
                     self.pos = lit_start;
-                    self.cursor = Cursor::new(&str_before[prefix_len as usize..]);
+                    self.cursor = Cursor::new(&str_before[prefix_len as usize..], FrontmatterAllowed::No);
                     self.report_unknown_prefix(start);
                     let prefix_span = self.mk_sp(start, lit_start);
                     return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
@@ -361,7 +368,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                         // Reset the state so we just lex the `'r`.
                         let lt_start = start + BytePos(2);
                         self.pos = lt_start;
-                        self.cursor = Cursor::new(&str_before[2 as usize..]);
+                        self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No);
 
                         let lifetime_name = self.str_from(start);
                         let ident = Symbol::intern(lifetime_name);
@@ -474,6 +481,91 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         }
     }
 
+    fn validate_frontmatter(
+        &self,
+        start: BytePos,
+        has_invalid_preceding_whitespace: bool,
+        invalid_infostring: bool,
+    ) {
+        let s = self.str_from(start);
+        let real_start = s.find("---").unwrap();
+        let frontmatter_opening_pos = BytePos(real_start as u32) + start;
+        let s_new = &s[real_start..];
+        let within = s_new.trim_start_matches('-');
+        let len_opening = s_new.len() - within.len();
+
+        let frontmatter_opening_end_pos = frontmatter_opening_pos + BytePos(len_opening as u32);
+        if has_invalid_preceding_whitespace {
+            let line_start =
+                BytePos(s[..real_start].rfind("\n").map_or(0, |i| i as u32 + 1)) + start;
+            let span = self.mk_sp(line_start, frontmatter_opening_end_pos);
+            let label_span = self.mk_sp(line_start, frontmatter_opening_pos);
+            self.dcx().emit_err(errors::FrontmatterInvalidOpeningPrecedingWhitespace {
+                span,
+                note_span: label_span,
+            });
+        }
+
+        if invalid_infostring {
+            let line_end = s[real_start..].find('\n').unwrap_or(s[real_start..].len());
+            let span = self.mk_sp(
+                frontmatter_opening_end_pos,
+                frontmatter_opening_pos + BytePos(line_end as u32),
+            );
+            self.dcx().emit_err(errors::FrontmatterInvalidInfostring { span });
+        }
+
+        let last_line_start = within.rfind('\n').map_or(0, |i| i + 1);
+        let last_line = &within[last_line_start..];
+        let last_line_trimmed = last_line.trim_start_matches(is_whitespace);
+        let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32);
+
+        let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos);
+        self.psess.gated_spans.gate(sym::frontmatter, frontmatter_span);
+
+        if !last_line_trimmed.starts_with("---") {
+            let label_span = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
+            self.dcx().emit_err(errors::FrontmatterUnclosed {
+                span: frontmatter_span,
+                note_span: label_span,
+            });
+            return;
+        }
+
+        if last_line_trimmed.len() != last_line.len() {
+            let line_end = last_line_start_pos + BytePos(last_line.len() as u32);
+            let span = self.mk_sp(last_line_start_pos, line_end);
+            let whitespace_end =
+                last_line_start_pos + BytePos((last_line.len() - last_line_trimmed.len()) as u32);
+            let label_span = self.mk_sp(last_line_start_pos, whitespace_end);
+            self.dcx().emit_err(errors::FrontmatterInvalidClosingPrecedingWhitespace {
+                span,
+                note_span: label_span,
+            });
+        }
+
+        let rest = last_line_trimmed.trim_start_matches('-');
+        let len_close = last_line_trimmed.len() - rest.len();
+        if len_close != len_opening {
+            let span = self.mk_sp(frontmatter_opening_pos, self.pos);
+            let opening = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
+            let last_line_close_pos = last_line_start_pos + BytePos(len_close as u32);
+            let close = self.mk_sp(last_line_start_pos, last_line_close_pos);
+            self.dcx().emit_err(errors::FrontmatterLengthMismatch {
+                span,
+                opening,
+                close,
+                len_opening,
+                len_close,
+            });
+        }
+
+        if !rest.trim_matches(is_whitespace).is_empty() {
+            let span = self.mk_sp(last_line_start_pos, self.pos);
+            self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span });
+        }
+    }
+
     fn cook_doc_comment(
         &self,
         content_start: BytePos,
@@ -839,7 +931,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         let space_pos = start + BytePos(1);
         let space_span = self.mk_sp(space_pos, space_pos);
 
-        let mut cursor = Cursor::new(str_before);
+        let mut cursor = Cursor::new(str_before, FrontmatterAllowed::No);
 
         let (is_string, span, unterminated) = match cursor.guarded_double_quoted_string() {
             Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => {
@@ -905,7 +997,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
             // For backwards compatibility, roll back to after just the first `#`
             // and return the `Pound` token.
             self.pos = start + BytePos(1);
-            self.cursor = Cursor::new(&str_before[1..]);
+            self.cursor = Cursor::new(&str_before[1..], FrontmatterAllowed::No);
             token::Pound
         }
     }
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index 5500fba58a5..968376678f3 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -56,19 +56,64 @@ mod tests;
 mod tokenstream {
     mod tests;
 }
-#[cfg(test)]
-mod mut_visit {
-    mod tests;
-}
 
 bitflags::bitflags! {
+    /// Restrictions applied while parsing.
+    ///
+    /// The parser maintains a bitset of restrictions it will honor while
+    /// parsing. This is essentially used as a way of tracking state of what
+    /// is being parsed and to change behavior based on that.
     #[derive(Clone, Copy, Debug)]
     struct Restrictions: u8 {
+        /// Restricts expressions for use in statement position.
+        ///
+        /// When expressions are used in various places, like statements or
+        /// match arms, this is used to stop parsing once certain tokens are
+        /// reached.
+        ///
+        /// For example, `if true {} & 1` with `STMT_EXPR` in effect is parsed
+        /// as two separate expression statements (`if` and a reference to 1).
+        /// Otherwise it is parsed as a bitwise AND where `if` is on the left
+        /// and 1 is on the right.
         const STMT_EXPR         = 1 << 0;
+        /// Do not allow struct literals.
+        ///
+        /// There are several places in the grammar where we don't want to
+        /// allow struct literals because they can require lookahead, or
+        /// otherwise could be ambiguous or cause confusion. For example,
+        /// `if Foo {} {}` isn't clear if it is `Foo{}` struct literal, or
+        /// just `Foo` is the condition, followed by a consequent block,
+        /// followed by an empty block.
+        ///
+        /// See [RFC 92](https://rust-lang.github.io/rfcs/0092-struct-grammar.html).
         const NO_STRUCT_LITERAL = 1 << 1;
+        /// Used to provide better error messages for const generic arguments.
+        ///
+        /// An un-braced const generic argument is limited to a very small
+        /// subset of expressions. This is used to detect the situation where
+        /// an expression outside of that subset is used, and to suggest to
+        /// wrap the expression in braces.
         const CONST_EXPR        = 1 << 2;
+        /// Allows `let` expressions.
+        ///
+        /// `let pattern = scrutinee` is parsed as an expression, but it is
+        /// only allowed in let chains (`if` and `while` conditions).
+        /// Otherwise it is not an expression (note that `let` in statement
+        /// positions is treated as a `StmtKind::Let` statement, which has a
+        /// slightly different grammar).
         const ALLOW_LET         = 1 << 3;
+        /// Used to detect a missing `=>` in a match guard.
+        ///
+        /// This is used for error handling in a match guard to give a better
+        /// error message if the `=>` is missing. It is set when parsing the
+        /// guard expression.
         const IN_IF_GUARD       = 1 << 4;
+        /// Used to detect the incorrect use of expressions in patterns.
+        ///
+        /// This is used for error handling while parsing a pattern. During
+        /// error recovery, this will be set to try to parse the pattern as an
+        /// expression, but halts parsing the expression when reaching certain
+        /// tokens like `=`.
         const IS_PAT            = 1 << 5;
     }
 }
diff --git a/compiler/rustc_parse/src/parser/mut_visit/tests.rs b/compiler/rustc_parse/src/parser/mut_visit/tests.rs
deleted file mode 100644
index 46c678c3902..00000000000
--- a/compiler/rustc_parse/src/parser/mut_visit/tests.rs
+++ /dev/null
@@ -1,65 +0,0 @@
-use rustc_ast as ast;
-use rustc_ast::mut_visit::MutVisitor;
-use rustc_ast_pretty::pprust;
-use rustc_span::{Ident, create_default_session_globals_then};
-
-use crate::parser::tests::{matches_codepattern, string_to_crate};
-
-// This version doesn't care about getting comments or doc-strings in.
-fn print_crate_items(krate: &ast::Crate) -> String {
-    krate.items.iter().map(|i| pprust::item_to_string(i)).collect::<Vec<_>>().join(" ")
-}
-
-// Change every identifier to "zz".
-struct ToZzIdentMutVisitor;
-
-impl MutVisitor for ToZzIdentMutVisitor {
-    const VISIT_TOKENS: bool = true;
-
-    fn visit_ident(&mut self, ident: &mut Ident) {
-        *ident = Ident::from_str("zz");
-    }
-}
-
-macro_rules! assert_matches_codepattern {
-    ($a:expr , $b:expr) => {{
-        let a_val = $a;
-        let b_val = $b;
-        if !matches_codepattern(&a_val, &b_val) {
-            panic!("expected args satisfying `matches_codepattern`, got {} and {}", a_val, b_val);
-        }
-    }};
-}
-
-// Make sure idents get transformed everywhere.
-#[test]
-fn ident_transformation() {
-    create_default_session_globals_then(|| {
-        let mut zz_visitor = ToZzIdentMutVisitor;
-        let mut krate =
-            string_to_crate("#[a] mod b {fn c (d : e, f : g) {h!(i,j,k);l;m}}".to_string());
-        zz_visitor.visit_crate(&mut krate);
-        assert_matches_codepattern!(
-            print_crate_items(&krate),
-            "#[zz]mod zz{fn zz(zz:zz,zz:zz){zz!(zz,zz,zz);zz;zz}}".to_string()
-        );
-    })
-}
-
-// Make sure idents get transformed even inside macro defs.
-#[test]
-fn ident_transformation_in_defs() {
-    create_default_session_globals_then(|| {
-        let mut zz_visitor = ToZzIdentMutVisitor;
-        let mut krate = string_to_crate(
-            "macro_rules! a {(b $c:expr $(d $e:token)f+ => \
-            (g $(d $d $e)+))} "
-                .to_string(),
-        );
-        zz_visitor.visit_crate(&mut krate);
-        assert_matches_codepattern!(
-            print_crate_items(&krate),
-            "macro_rules! zz{(zz$zz:zz$(zz $zz:zz)zz+=>(zz$(zz$zz$zz)+))}".to_string()
-        );
-    })
-}
diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs
index 8285070839a..2a44c90abc1 100644
--- a/compiler/rustc_parse/src/parser/tests.rs
+++ b/compiler/rustc_parse/src/parser/tests.rs
@@ -95,12 +95,6 @@ pub(crate) fn string_to_stream(source_str: String) -> TokenStream {
     ))
 }
 
-/// Parses a string, returns a crate.
-pub(crate) fn string_to_crate(source_str: String) -> ast::Crate {
-    let psess = psess();
-    with_error_checking_parse(source_str, &psess, |p| p.parse_crate_mod())
-}
-
 /// Does the given string match the pattern? whitespace in the first string
 /// may be deleted or replaced with other whitespace to match the pattern.
 /// This function is relatively Unicode-ignorant; fortunately, the careful design
diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs
index 6a1c2af48ed..378cbb84637 100644
--- a/compiler/rustc_parse/src/validate_attr.rs
+++ b/compiler/rustc_parse/src/validate_attr.rs
@@ -3,7 +3,8 @@
 use rustc_ast::token::Delimiter;
 use rustc_ast::tokenstream::DelimSpan;
 use rustc_ast::{
-    self as ast, AttrArgs, Attribute, DelimArgs, MetaItem, MetaItemInner, MetaItemKind, Safety,
+    self as ast, AttrArgs, Attribute, DelimArgs, MetaItem, MetaItemInner, MetaItemKind, NodeId,
+    Safety,
 };
 use rustc_errors::{Applicability, FatalError, PResult};
 use rustc_feature::{AttributeSafety, AttributeTemplate, BUILTIN_ATTRIBUTE_MAP, BuiltinAttribute};
@@ -15,21 +16,19 @@ use rustc_span::{Span, Symbol, sym};
 
 use crate::{errors, parse_in};
 
-pub fn check_attr(psess: &ParseSess, attr: &Attribute) {
+pub fn check_attr(psess: &ParseSess, attr: &Attribute, id: NodeId) {
     if attr.is_doc_comment() || attr.has_name(sym::cfg_trace) || attr.has_name(sym::cfg_attr_trace)
     {
         return;
     }
 
-    let attr_info = attr.ident().and_then(|ident| BUILTIN_ATTRIBUTE_MAP.get(&ident.name));
-    let attr_item = attr.get_normal_item();
+    let builtin_attr_info = attr.ident().and_then(|ident| BUILTIN_ATTRIBUTE_MAP.get(&ident.name));
 
-    // All non-builtin attributes are considered safe
-    let safety = attr_info.map(|x| x.safety).unwrap_or(AttributeSafety::Normal);
-    check_attribute_safety(psess, safety, attr);
+    let builtin_attr_safety = builtin_attr_info.map(|x| x.safety);
+    check_attribute_safety(psess, builtin_attr_safety, attr, id);
 
     // Check input tokens for built-in and key-value attributes.
-    match attr_info {
+    match builtin_attr_info {
         // `rustc_dummy` doesn't have any restrictions specific to built-in attributes.
         Some(BuiltinAttribute { name, template, .. }) if *name != sym::rustc_dummy => {
             match parse_meta(psess, attr) {
@@ -43,6 +42,7 @@ pub fn check_attr(psess: &ParseSess, attr: &Attribute) {
             }
         }
         _ => {
+            let attr_item = attr.get_normal_item();
             if let AttrArgs::Eq { .. } = attr_item.args {
                 // All key-value attributes are restricted to meta-item syntax.
                 match parse_meta(psess, attr) {
@@ -154,11 +154,23 @@ fn is_attr_template_compatible(template: &AttributeTemplate, meta: &ast::MetaIte
     }
 }
 
-pub fn check_attribute_safety(psess: &ParseSess, safety: AttributeSafety, attr: &Attribute) {
+pub fn check_attribute_safety(
+    psess: &ParseSess,
+    builtin_attr_safety: Option<AttributeSafety>,
+    attr: &Attribute,
+    id: NodeId,
+) {
     let attr_item = attr.get_normal_item();
+    match (builtin_attr_safety, attr_item.unsafety) {
+        // - Unsafe builtin attribute
+        // - User wrote `#[unsafe(..)]`, which is permitted on any edition
+        (Some(AttributeSafety::Unsafe { .. }), Safety::Unsafe(..)) => {
+            // OK
+        }
 
-    if let AttributeSafety::Unsafe { unsafe_since } = safety {
-        if let ast::Safety::Default = attr_item.unsafety {
+        // - Unsafe builtin attribute
+        // - User did not write `#[unsafe(..)]`
+        (Some(AttributeSafety::Unsafe { unsafe_since }), Safety::Default) => {
             let path_span = attr_item.path.span;
 
             // If the `attr_item`'s span is not from a macro, then just suggest
@@ -185,7 +197,7 @@ pub fn check_attribute_safety(psess: &ParseSess, safety: AttributeSafety, attr:
                 psess.buffer_lint(
                     UNSAFE_ATTR_OUTSIDE_UNSAFE,
                     path_span,
-                    ast::CRATE_NODE_ID,
+                    id,
                     BuiltinLintDiag::UnsafeAttrOutsideUnsafe {
                         attribute_name_span: path_span,
                         sugg_spans: (diag_span.shrink_to_lo(), diag_span.shrink_to_hi()),
@@ -193,11 +205,38 @@ pub fn check_attribute_safety(psess: &ParseSess, safety: AttributeSafety, attr:
                 );
             }
         }
-    } else if let Safety::Unsafe(unsafe_span) = attr_item.unsafety {
-        psess.dcx().emit_err(errors::InvalidAttrUnsafe {
-            span: unsafe_span,
-            name: attr_item.path.clone(),
-        });
+
+        // - Normal builtin attribute, or any non-builtin attribute
+        // - All non-builtin attributes are currently considered safe; writing `#[unsafe(..)]` is
+        //   not permitted on non-builtin attributes or normal builtin attributes
+        (Some(AttributeSafety::Normal) | None, Safety::Unsafe(unsafe_span)) => {
+            psess.dcx().emit_err(errors::InvalidAttrUnsafe {
+                span: unsafe_span,
+                name: attr_item.path.clone(),
+            });
+        }
+
+        // - Normal builtin attribute
+        // - No explicit `#[unsafe(..)]` written.
+        (Some(AttributeSafety::Normal), Safety::Default) => {
+            // OK
+        }
+
+        // - Non-builtin attribute
+        // - No explicit `#[unsafe(..)]` written.
+        (None, Safety::Default) => {
+            // OK
+        }
+
+        (
+            Some(AttributeSafety::Unsafe { .. } | AttributeSafety::Normal) | None,
+            Safety::Safe(..),
+        ) => {
+            psess.dcx().span_delayed_bug(
+                attr_item.span(),
+                "`check_attribute_safety` does not expect `Safety::Safe` on attributes",
+            );
+        }
     }
 }