diff options
Diffstat (limited to 'compiler/rustc_parse')
| -rw-r--r-- | compiler/rustc_parse/Cargo.toml | 8 | ||||
| -rw-r--r-- | compiler/rustc_parse/messages.ftl | 5 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/errors.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 41 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lib.rs | 45 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 31 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/pat.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/tests.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/ty.rs | 19 |
10 files changed, 95 insertions, 74 deletions
diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index 7cb4ae7ff5f..6d738a10371 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -5,8 +5,8 @@ edition = "2024" [dependencies] # tidy-alphabetical-start -bitflags.workspace = true -rustc-literal-escaper.workspace = true +bitflags = "2.4.1" +rustc-literal-escaper = "0.0.5" rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } rustc_data_structures = { path = "../rustc_data_structures" } @@ -18,8 +18,8 @@ rustc_lexer = { path = "../rustc_lexer" } rustc_macros = { path = "../rustc_macros" } rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } -thin-vec.workspace = true -tracing.workspace = true +thin-vec = "0.2.12" +tracing = "0.1" unicode-normalization = "0.1.11" unicode-width = "0.2.0" # tidy-alphabetical-end diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index 4ca2f57bd87..72cd75f6d89 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -58,7 +58,7 @@ parse_async_use_order_incorrect = the order of `use` and `async` is incorrect parse_at_dot_dot_in_struct_pattern = `@ ..` is not supported in struct patterns .suggestion = bind to each field separately or, if you don't need them, just remove `{$ident} @` -parse_at_in_struct_pattern = Unexpected `@` in struct pattern +parse_at_in_struct_pattern = unexpected `@` in struct pattern .note = struct patterns use `field: pattern` syntax to bind to fields .help = consider replacing `new_name @ field_name` with `field_name: new_name` if that is what you intended @@ -479,9 +479,6 @@ parse_invalid_identifier_with_leading_number = identifiers cannot start with a n parse_invalid_literal_suffix_on_tuple_index = suffixes on a tuple index are invalid .label = invalid suffix `{$suffix}` - .tuple_exception_line_1 = `{$suffix}` is *temporarily* accepted on tuple index fields as it was incorrectly accepted on stable for a few releases - .tuple_exception_line_2 = on proc macros, you'll want to use `syn::Index::from` or `proc_macro::Literal::*_unsuffixed` for code that will desugar to tuple field access - .tuple_exception_line_3 = see issue #60210 <https://github.com/rust-lang/rust/issues/60210> for more information parse_invalid_logical_operator = `{$incorrect}` is not a logical operator .note = unlike in e.g., Python and PHP, `&&` and `||` are used for logical operators diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 797d4830c2f..00ca5acd84d 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -1017,10 +1017,6 @@ pub(crate) struct InvalidLiteralSuffixOnTupleIndex { #[label] pub span: Span, pub suffix: Symbol, - #[help(parse_tuple_exception_line_1)] - #[help(parse_tuple_exception_line_2)] - #[help(parse_tuple_exception_line_3)] - pub exception: bool, } #[derive(Diagnostic)] diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 9792240a548..51019db7c00 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -6,7 +6,7 @@ use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_contr use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; use rustc_lexer::{ - Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace, + Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_horizontal_whitespace, }; use rustc_literal_escaper::{EscapeError, Mode, check_for_errors}; use rustc_session::lint::BuiltinLintDiag; @@ -44,19 +44,44 @@ pub(crate) struct UnmatchedDelim { pub candidate_span: Option<Span>, } +/// Which tokens should be stripped before lexing the tokens. +pub enum StripTokens { + /// Strip both shebang and frontmatter. + ShebangAndFrontmatter, + /// Strip the shebang but not frontmatter. + /// + /// That means that char sequences looking like frontmatter are simply + /// interpreted as regular Rust lexemes. + Shebang, + /// Strip nothing. + /// + /// In other words, char sequences looking like a shebang or frontmatter + /// are simply interpreted as regular Rust lexemes. + Nothing, +} + pub(crate) fn lex_token_trees<'psess, 'src>( psess: &'psess ParseSess, mut src: &'src str, mut start_pos: BytePos, override_span: Option<Span>, - frontmatter_allowed: FrontmatterAllowed, + strip_tokens: StripTokens, ) -> Result<TokenStream, Vec<Diag<'psess>>> { - // Skip `#!`, if present. - if let Some(shebang_len) = rustc_lexer::strip_shebang(src) { - src = &src[shebang_len..]; - start_pos = start_pos + BytePos::from_usize(shebang_len); + match strip_tokens { + StripTokens::Shebang | StripTokens::ShebangAndFrontmatter => { + if let Some(shebang_len) = rustc_lexer::strip_shebang(src) { + src = &src[shebang_len..]; + start_pos = start_pos + BytePos::from_usize(shebang_len); + } + } + StripTokens::Nothing => {} } + let frontmatter_allowed = match strip_tokens { + StripTokens::ShebangAndFrontmatter => FrontmatterAllowed::Yes, + StripTokens::Shebang | StripTokens::Nothing => FrontmatterAllowed::No, + }; + let cursor = Cursor::new(src, frontmatter_allowed); let mut lexer = Lexer { psess, @@ -597,7 +622,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let last_line_start = within.rfind('\n').map_or(0, |i| i + 1); let last_line = &within[last_line_start..]; - let last_line_trimmed = last_line.trim_start_matches(is_whitespace); + let last_line_trimmed = last_line.trim_start_matches(is_horizontal_whitespace); let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32); let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos); @@ -640,7 +665,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { }); } - if !rest.trim_matches(is_whitespace).is_empty() { + if !rest.trim_matches(is_horizontal_whitespace).is_empty() { let span = self.mk_sp(last_line_start_pos, self.pos); self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span }); } diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index 197333d942d..88b67d792de 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -21,7 +21,6 @@ use rustc_ast::tokenstream::{DelimSpan, TokenStream}; use rustc_ast::{AttrItem, Attribute, MetaItemInner, token}; use rustc_ast_pretty::pprust; use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize}; -use rustc_lexer::FrontmatterAllowed; use rustc_session::parse::ParseSess; use rustc_span::source_map::SourceMap; use rustc_span::{FileName, SourceFile, Span}; @@ -34,6 +33,8 @@ pub mod parser; use parser::Parser; use rustc_ast::token::Delimiter; +use crate::lexer::StripTokens; + pub mod lexer; mod errors; @@ -53,16 +54,18 @@ pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T { } } -/// Creates a new parser from a source string. On failure, the errors must be consumed via -/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are -/// dropped. +/// Creates a new parser from a source string. +/// +/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`, +/// etc., otherwise a panic will occur when they are dropped. pub fn new_parser_from_source_str( psess: &ParseSess, name: FileName, source: String, + strip_tokens: StripTokens, ) -> Result<Parser<'_>, Vec<Diag<'_>>> { let source_file = psess.source_map().new_source_file(name, source); - new_parser_from_source_file(psess, source_file) + new_parser_from_source_file(psess, source_file, strip_tokens) } /// Creates a new parser from a filename. On failure, the errors must be consumed via @@ -73,6 +76,7 @@ pub fn new_parser_from_source_str( pub fn new_parser_from_file<'a>( psess: &'a ParseSess, path: &Path, + strip_tokens: StripTokens, sp: Option<Span>, ) -> Result<Parser<'a>, Vec<Diag<'a>>> { let sm = psess.source_map(); @@ -96,7 +100,7 @@ pub fn new_parser_from_file<'a>( } err.emit(); }); - new_parser_from_source_file(psess, source_file) + new_parser_from_source_file(psess, source_file, strip_tokens) } pub fn utf8_error<E: EmissionGuarantee>( @@ -147,9 +151,10 @@ pub fn utf8_error<E: EmissionGuarantee>( fn new_parser_from_source_file( psess: &ParseSess, source_file: Arc<SourceFile>, + strip_tokens: StripTokens, ) -> Result<Parser<'_>, Vec<Diag<'_>>> { let end_pos = source_file.end_position(); - let stream = source_file_to_stream(psess, source_file, None, FrontmatterAllowed::Yes)?; + let stream = source_file_to_stream(psess, source_file, None, strip_tokens)?; let mut parser = Parser::new(psess, stream, None); if parser.token == token::Eof { parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None); @@ -157,6 +162,9 @@ fn new_parser_from_source_file( Ok(parser) } +/// Given a source string, produces a sequence of token trees. +/// +/// NOTE: This only strips shebangs, not frontmatter! pub fn source_str_to_stream( psess: &ParseSess, name: FileName, @@ -164,18 +172,21 @@ pub fn source_str_to_stream( override_span: Option<Span>, ) -> Result<TokenStream, Vec<Diag<'_>>> { let source_file = psess.source_map().new_source_file(name, source); - // used mainly for `proc_macro` and the likes, not for our parsing purposes, so don't parse - // frontmatters as frontmatters. - source_file_to_stream(psess, source_file, override_span, FrontmatterAllowed::No) + // FIXME(frontmatter): Consider stripping frontmatter in a future edition. We can't strip them + // in the current edition since that would be breaking. + // See also <https://github.com/rust-lang/rust/issues/145520>. + // Alternatively, stop stripping shebangs here, too, if T-lang and crater approve. + source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang) } -/// Given a source file, produces a sequence of token trees. Returns any buffered errors from -/// parsing the token stream. +/// Given a source file, produces a sequence of token trees. +/// +/// Returns any buffered errors from parsing the token stream. fn source_file_to_stream<'psess>( psess: &'psess ParseSess, source_file: Arc<SourceFile>, override_span: Option<Span>, - frontmatter_allowed: FrontmatterAllowed, + strip_tokens: StripTokens, ) -> Result<TokenStream, Vec<Diag<'psess>>> { let src = source_file.src.as_ref().unwrap_or_else(|| { psess.dcx().bug(format!( @@ -184,13 +195,7 @@ fn source_file_to_stream<'psess>( )); }); - lexer::lex_token_trees( - psess, - src.as_str(), - source_file.start_pos, - override_span, - frontmatter_allowed, - ) + lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span, strip_tokens) } /// Runs the given subparser `f` on the tokens of the given `attr`'s item. diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 7de4f6efd0b..81a5d48d94e 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1163,7 +1163,10 @@ impl<'a> Parser<'a> { suffix, }) => { if let Some(suffix) = suffix { - self.expect_no_tuple_index_suffix(current.span, suffix); + self.dcx().emit_err(errors::InvalidLiteralSuffixOnTupleIndex { + span: current.span, + suffix, + }); } match self.break_up_float(symbol, current.span) { // 1e2 @@ -1239,7 +1242,8 @@ impl<'a> Parser<'a> { suffix: Option<Symbol>, ) -> Box<Expr> { if let Some(suffix) = suffix { - self.expect_no_tuple_index_suffix(ident_span, suffix); + self.dcx() + .emit_err(errors::InvalidLiteralSuffixOnTupleIndex { span: ident_span, suffix }); } self.mk_expr(lo.to(ident_span), ExprKind::Field(base, Ident::new(field, ident_span))) } @@ -2225,24 +2229,6 @@ impl<'a> Parser<'a> { }) } - pub(super) fn expect_no_tuple_index_suffix(&self, span: Span, suffix: Symbol) { - if [sym::i32, sym::u32, sym::isize, sym::usize].contains(&suffix) { - // #59553: warn instead of reject out of hand to allow the fix to percolate - // through the ecosystem when people fix their macros - self.dcx().emit_warn(errors::InvalidLiteralSuffixOnTupleIndex { - span, - suffix, - exception: true, - }); - } else { - self.dcx().emit_err(errors::InvalidLiteralSuffixOnTupleIndex { - span, - suffix, - exception: false, - }); - } - } - /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). /// Keep this in sync with `Token::can_begin_literal_maybe_minus`. pub fn parse_literal_maybe_minus(&mut self) -> PResult<'a, Box<Expr>> { @@ -2742,7 +2728,7 @@ impl<'a> Parser<'a> { /// The specified `edition` in `let_chains_policy` should be that of the whole `if` construct, /// i.e. the same span we use to later decide whether the drop behaviour should be that of /// edition `..=2021` or that of `2024..`. - // Public because it is used in rustfmt forks such as https://github.com/tucant/rustfmt/blob/30c83df9e1db10007bdd16dafce8a86b404329b2/src/parse/macros/html.rs#L57 for custom if expressions. + // Public to use it for custom `if` expressions in rustfmt forks like https://github.com/tucant/rustfmt pub fn parse_expr_cond( &mut self, let_chains_policy: LetChainsPolicy, @@ -2910,7 +2896,8 @@ impl<'a> Parser<'a> { } } - fn parse_for_head(&mut self) -> PResult<'a, (Box<Pat>, Box<Expr>)> { + // Public to use it for custom `for` expressions in rustfmt forks like https://github.com/tucant/rustfmt + pub fn parse_for_head(&mut self) -> PResult<'a, (Box<Pat>, Box<Expr>)> { let begin_paren = if self.token == token::OpenParen { // Record whether we are about to parse `for (`. // This is used below for recovery in case of `for ( $stuff ) $block` diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 15598f32429..ed4069dae93 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -22,6 +22,8 @@ use std::{fmt, mem, slice}; use attr_wrapper::{AttrWrapper, UsePreAttrPos}; pub use diagnostics::AttemptLocalParseRecovery; pub(crate) use expr::ForbiddenLetReason; +// Public to use it for custom `if` expressions in rustfmt forks like https://github.com/tucant/rustfmt +pub use expr::LetChainsPolicy; pub(crate) use item::{FnContext, FnParseMode}; pub use pat::{CommaRecoveryMode, RecoverColon, RecoverComma}; pub use path::PathStyle; @@ -1333,7 +1335,10 @@ impl<'a> Parser<'a> { if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = self.token.kind { if let Some(suffix) = suffix { - self.expect_no_tuple_index_suffix(self.token.span, suffix); + self.dcx().emit_err(errors::InvalidLiteralSuffixOnTupleIndex { + span: self.token.span, + suffix, + }); } self.bump(); Ok(Ident::new(symbol, self.prev_token.span)) diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index 9754691a0b9..c4d30b3d328 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -1516,7 +1516,7 @@ impl<'a> Parser<'a> { || self.check_noexpect(&token::DotDotDot) || self.check_keyword(exp!(Underscore)) { - etc = PatFieldsRest::Rest; + etc = PatFieldsRest::Rest(self.token.span); let mut etc_sp = self.token.span; if first_etc_and_maybe_comma_span.is_none() { if let Some(comma_tok) = diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs index a6e7266e71b..e645fb47b9e 100644 --- a/compiler/rustc_parse/src/parser/tests.rs +++ b/compiler/rustc_parse/src/parser/tests.rs @@ -22,6 +22,7 @@ use rustc_span::{ }; use termcolor::WriteColor; +use crate::lexer::StripTokens; use crate::parser::{ForceCollect, Parser}; use crate::{new_parser_from_source_str, source_str_to_stream, unwrap_or_emit_fatal}; @@ -35,6 +36,7 @@ fn string_to_parser(psess: &ParseSess, source_str: String) -> Parser<'_> { psess, PathBuf::from("bogofile").into(), source_str, + StripTokens::Nothing, )) } @@ -2240,7 +2242,7 @@ fn parse_item_from_source_str( source: String, psess: &ParseSess, ) -> PResult<'_, Option<Box<ast::Item>>> { - unwrap_or_emit_fatal(new_parser_from_source_str(psess, name, source)) + unwrap_or_emit_fatal(new_parser_from_source_str(psess, name, source, StripTokens::Nothing)) .parse_item(ForceCollect::No) } @@ -2520,7 +2522,8 @@ fn ttdelim_span() { source: String, psess: &ParseSess, ) -> PResult<'_, Box<ast::Expr>> { - unwrap_or_emit_fatal(new_parser_from_source_str(psess, name, source)).parse_expr() + unwrap_or_emit_fatal(new_parser_from_source_str(psess, name, source, StripTokens::Nothing)) + .parse_expr() } create_default_session_globals_then(|| { diff --git a/compiler/rustc_parse/src/parser/ty.rs b/compiler/rustc_parse/src/parser/ty.rs index 6168647183f..23aaafac934 100644 --- a/compiler/rustc_parse/src/parser/ty.rs +++ b/compiler/rustc_parse/src/parser/ty.rs @@ -92,10 +92,10 @@ fn can_continue_type_after_non_fn_ident(t: &Token) -> bool { } fn can_begin_dyn_bound_in_edition_2015(t: &Token) -> bool { - // `Not`, `Tilde` & `Const` are deliberately not part of this list to + // `!`, `const`, `[`, `async` are deliberately not part of this list to // contain the number of potential regressions esp. in MBE code. - // `Const` would regress `rfc-2632-const-trait-impl/mbe-dyn-const-2015.rs`. - // `Not` would regress `dyn!(...)` macro calls in Rust 2015. + // `const` and `[` would regress UI test `macro-dyn-const-2015.rs`. + // `!` would regress `dyn!(...)` macro calls in Rust 2015. t.is_path_start() || t.is_lifetime() || t == &TokenKind::Question @@ -1015,12 +1015,18 @@ impl<'a> Parser<'a> { || self.check(exp!(Tilde)) || self.check_keyword(exp!(For)) || self.check(exp!(OpenParen)) - || self.check(exp!(OpenBracket)) + || self.can_begin_maybe_const_bound() || self.check_keyword(exp!(Const)) || self.check_keyword(exp!(Async)) || self.check_keyword(exp!(Use)) } + fn can_begin_maybe_const_bound(&mut self) -> bool { + self.check(exp!(OpenBracket)) + && self.look_ahead(1, |t| t.is_keyword(kw::Const)) + && self.look_ahead(2, |t| *t == token::CloseBracket) + } + /// Parse a bound. /// /// ```ebnf @@ -1199,10 +1205,7 @@ impl<'a> Parser<'a> { let span = tilde.to(self.prev_token.span); self.psess.gated_spans.gate(sym::const_trait_impl, span); BoundConstness::Maybe(span) - } else if self.check(exp!(OpenBracket)) - && self.look_ahead(1, |t| t.is_keyword(kw::Const)) - && self.look_ahead(2, |t| *t == token::CloseBracket) - { + } else if self.can_begin_maybe_const_bound() { let start = self.token.span; self.bump(); self.expect_keyword(exp!(Const)).unwrap(); |
