From be023ebe850261c6bb202a02a686827d821c3697 Mon Sep 17 00:00:00 2001 From: Mazdak Farrokhzad Date: Thu, 10 Oct 2019 10:26:10 +0200 Subject: move config.rs to libsyntax_expand --- src/libsyntax/parse/parser/attr.rs | 2 +- src/libsyntax/parse/parser/module.rs | 19 +++++++------------ 2 files changed, 8 insertions(+), 13 deletions(-) (limited to 'src/libsyntax/parse/parser') diff --git a/src/libsyntax/parse/parser/attr.rs b/src/libsyntax/parse/parser/attr.rs index 31f0a02a483..0f9e573af82 100644 --- a/src/libsyntax/parse/parser/attr.rs +++ b/src/libsyntax/parse/parser/attr.rs @@ -268,7 +268,7 @@ impl<'a> Parser<'a> { } /// Parses `cfg_attr(pred, attr_item_list)` where `attr_item_list` is comma-delimited. - crate fn parse_cfg_attr(&mut self) -> PResult<'a, (ast::MetaItem, Vec<(ast::AttrItem, Span)>)> { + pub fn parse_cfg_attr(&mut self) -> PResult<'a, (ast::MetaItem, Vec<(ast::AttrItem, Span)>)> { self.expect(&token::OpenDelim(token::Paren))?; let cfg_predicate = self.parse_meta_item()?; diff --git a/src/libsyntax/parse/parser/module.rs b/src/libsyntax/parse/parser/module.rs index 3e5974c2eee..ad72b3a1dea 100644 --- a/src/libsyntax/parse/parser/module.rs +++ b/src/libsyntax/parse/parser/module.rs @@ -7,8 +7,8 @@ use crate::ast::{self, Ident, Attribute, ItemKind, Mod, Crate}; use crate::parse::{new_sub_parser_from_file, DirectoryOwnership}; use crate::token::{self, TokenKind}; use crate::source_map::{SourceMap, Span, DUMMY_SP, FileName}; -use crate::symbol::sym; +use syntax_pos::symbol::sym; use errors::PResult; use std::path::{self, Path, PathBuf}; @@ -39,17 +39,12 @@ impl<'a> Parser<'a> { /// Parses a `mod { ... }` or `mod ;` item. pub(super) fn parse_item_mod(&mut self, outer_attrs: &[Attribute]) -> PResult<'a, ItemInfo> { - let (in_cfg, outer_attrs) = { - // FIXME(Centril): This results in a cycle between config and parsing. - // Consider using dynamic dispatch via `self.sess` to disentangle the knot. - let mut strip_unconfigured = crate::config::StripUnconfigured { - sess: self.sess, - features: None, // Don't perform gated feature checking. - }; - let mut outer_attrs = outer_attrs.to_owned(); - strip_unconfigured.process_cfg_attrs(&mut outer_attrs); - (!self.cfg_mods || strip_unconfigured.in_cfg(&outer_attrs), outer_attrs) - }; + // HACK(Centril): See documentation on `ParseSess::process_cfg_mod`. + let (in_cfg, outer_attrs) = (self.sess.process_cfg_mod)( + self.sess, + self.cfg_mods, + outer_attrs, + ); let id_span = self.token.span; let id = self.parse_ident()?; -- cgit 1.4.1-3-g733a5 From 4ae2728fa8052915414127dce28245eb8f70842a Mon Sep 17 00:00:00 2001 From: Mazdak Farrokhzad Date: Tue, 15 Oct 2019 22:48:13 +0200 Subject: move syntax::parse -> librustc_parse also move MACRO_ARGUMENTS -> librustc_parse --- Cargo.lock | 26 +- src/librustc_driver/Cargo.toml | 1 + src/librustc_driver/lib.rs | 17 +- src/librustc_interface/Cargo.toml | 1 + src/librustc_interface/interface.rs | 2 +- src/librustc_interface/passes.rs | 15 +- src/librustc_interface/util.rs | 1 + src/librustc_lexer/src/lib.rs | 2 +- src/librustc_metadata/Cargo.toml | 1 + src/librustc_metadata/rmeta/decoder/cstore_impl.rs | 4 +- src/librustc_parse/Cargo.toml | 21 + src/librustc_parse/error_codes.rs | 174 ++ src/librustc_parse/lexer/mod.rs | 643 ++++++ src/librustc_parse/lexer/tokentrees.rs | 280 +++ .../lexer/unescape_error_reporting.rs | 215 ++ src/librustc_parse/lexer/unicode_chars.rs | 392 ++++ src/librustc_parse/lib.rs | 423 ++++ src/librustc_parse/parser/attr.rs | 351 +++ src/librustc_parse/parser/diagnostics.rs | 1549 ++++++++++++++ src/librustc_parse/parser/expr.rs | 1963 +++++++++++++++++ src/librustc_parse/parser/generics.rs | 308 +++ src/librustc_parse/parser/item.rs | 2238 ++++++++++++++++++++ src/librustc_parse/parser/mod.rs | 1393 ++++++++++++ src/librustc_parse/parser/module.rs | 316 +++ src/librustc_parse/parser/pat.rs | 1015 +++++++++ src/librustc_parse/parser/path.rs | 497 +++++ src/librustc_parse/parser/stmt.rs | 482 +++++ src/librustc_parse/parser/ty.rs | 460 ++++ src/librustc_parse/validate_attr.rs | 111 + src/librustc_passes/Cargo.toml | 5 +- src/librustc_passes/ast_validation.rs | 4 +- src/librustc_save_analysis/Cargo.toml | 1 + src/librustc_save_analysis/span_utils.rs | 6 +- src/librustdoc/html/highlight.rs | 4 +- src/librustdoc/lib.rs | 1 + src/librustdoc/passes/check_code_block_syntax.rs | 2 +- src/librustdoc/test.rs | 5 +- src/libsyntax/ast.rs | 6 +- src/libsyntax/attr/builtin.rs | 6 +- src/libsyntax/attr/mod.rs | 2 +- src/libsyntax/error_codes.rs | 165 -- src/libsyntax/lib.rs | 7 +- src/libsyntax/parse/lexer/mod.rs | 643 ------ src/libsyntax/parse/lexer/tokentrees.rs | 280 --- .../parse/lexer/unescape_error_reporting.rs | 215 -- src/libsyntax/parse/lexer/unicode_chars.rs | 392 ---- src/libsyntax/parse/mod.rs | 420 ---- src/libsyntax/parse/parser/attr.rs | 358 ---- src/libsyntax/parse/parser/diagnostics.rs | 1547 -------------- src/libsyntax/parse/parser/expr.rs | 1964 ----------------- src/libsyntax/parse/parser/generics.rs | 309 --- src/libsyntax/parse/parser/item.rs | 2237 ------------------- src/libsyntax/parse/parser/mod.rs | 1391 ------------ src/libsyntax/parse/parser/module.rs | 315 --- src/libsyntax/parse/parser/pat.rs | 1016 --------- src/libsyntax/parse/parser/path.rs | 497 ----- src/libsyntax/parse/parser/stmt.rs | 480 ----- src/libsyntax/parse/parser/ty.rs | 458 ---- src/libsyntax/parse/validate_attr.rs | 112 - src/libsyntax/print/pprust.rs | 14 +- src/libsyntax/sess.rs | 4 +- src/libsyntax/token.rs | 44 +- src/libsyntax/util/comments.rs | 4 +- src/libsyntax/util/literal.rs | 10 +- src/libsyntax/util/parser.rs | 4 +- src/libsyntax_expand/Cargo.toml | 5 +- src/libsyntax_expand/base.rs | 8 +- src/libsyntax_expand/config.rs | 6 +- src/libsyntax_expand/expand.rs | 6 +- src/libsyntax_expand/mbe/macro_parser.rs | 6 +- src/libsyntax_expand/mbe/macro_rules.rs | 10 +- src/libsyntax_expand/parse/lexer/tests.rs | 4 +- src/libsyntax_expand/parse/tests.rs | 2 +- src/libsyntax_expand/proc_macro.rs | 9 +- src/libsyntax_expand/proc_macro_server.rs | 10 +- src/libsyntax_expand/tests.rs | 2 +- src/libsyntax_ext/Cargo.toml | 1 + src/libsyntax_ext/assert.rs | 4 +- src/libsyntax_ext/cmdline_attrs.rs | 3 +- src/libsyntax_ext/source_util.rs | 10 +- src/libsyntax_ext/util.rs | 2 +- src/test/ui-fulldeps/ast_stmt_expr_attr.rs | 7 +- src/test/ui-fulldeps/mod_dir_path_canonicalized.rs | 3 +- src/test/ui-fulldeps/pprust-expr-roundtrip.rs | 3 +- 84 files changed, 12993 insertions(+), 12937 deletions(-) create mode 100644 src/librustc_parse/Cargo.toml create mode 100644 src/librustc_parse/error_codes.rs create mode 100644 src/librustc_parse/lexer/mod.rs create mode 100644 src/librustc_parse/lexer/tokentrees.rs create mode 100644 src/librustc_parse/lexer/unescape_error_reporting.rs create mode 100644 src/librustc_parse/lexer/unicode_chars.rs create mode 100644 src/librustc_parse/lib.rs create mode 100644 src/librustc_parse/parser/attr.rs create mode 100644 src/librustc_parse/parser/diagnostics.rs create mode 100644 src/librustc_parse/parser/expr.rs create mode 100644 src/librustc_parse/parser/generics.rs create mode 100644 src/librustc_parse/parser/item.rs create mode 100644 src/librustc_parse/parser/mod.rs create mode 100644 src/librustc_parse/parser/module.rs create mode 100644 src/librustc_parse/parser/pat.rs create mode 100644 src/librustc_parse/parser/path.rs create mode 100644 src/librustc_parse/parser/stmt.rs create mode 100644 src/librustc_parse/parser/ty.rs create mode 100644 src/librustc_parse/validate_attr.rs delete mode 100644 src/libsyntax/parse/lexer/mod.rs delete mode 100644 src/libsyntax/parse/lexer/tokentrees.rs delete mode 100644 src/libsyntax/parse/lexer/unescape_error_reporting.rs delete mode 100644 src/libsyntax/parse/lexer/unicode_chars.rs delete mode 100644 src/libsyntax/parse/mod.rs delete mode 100644 src/libsyntax/parse/parser/attr.rs delete mode 100644 src/libsyntax/parse/parser/diagnostics.rs delete mode 100644 src/libsyntax/parse/parser/expr.rs delete mode 100644 src/libsyntax/parse/parser/generics.rs delete mode 100644 src/libsyntax/parse/parser/item.rs delete mode 100644 src/libsyntax/parse/parser/mod.rs delete mode 100644 src/libsyntax/parse/parser/module.rs delete mode 100644 src/libsyntax/parse/parser/pat.rs delete mode 100644 src/libsyntax/parse/parser/path.rs delete mode 100644 src/libsyntax/parse/parser/stmt.rs delete mode 100644 src/libsyntax/parse/parser/ty.rs delete mode 100644 src/libsyntax/parse/validate_attr.rs (limited to 'src/libsyntax/parse/parser') diff --git a/Cargo.lock b/Cargo.lock index 0f770f3eadb..7e51f96cfb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3504,6 +3504,7 @@ dependencies = [ "rustc_lint", "rustc_metadata", "rustc_mir", + "rustc_parse", "rustc_plugin", "rustc_plugin_impl", "rustc_save_analysis", @@ -3571,6 +3572,7 @@ dependencies = [ "rustc_lint", "rustc_metadata", "rustc_mir", + "rustc_parse", "rustc_passes", "rustc_plugin_impl", "rustc_privacy", @@ -3648,6 +3650,7 @@ dependencies = [ "rustc_data_structures", "rustc_errors", "rustc_index", + "rustc_parse", "rustc_target", "serialize", "smallvec 1.0.0", @@ -3691,6 +3694,21 @@ dependencies = [ "core", ] +[[package]] +name = "rustc_parse" +version = "0.0.0" +dependencies = [ + "bitflags", + "log", + "rustc_data_structures", + "rustc_errors", + "rustc_lexer", + "rustc_target", + "smallvec 1.0.0", + "syntax", + "syntax_pos", +] + [[package]] name = "rustc_passes" version = "0.0.0" @@ -3700,6 +3718,7 @@ dependencies = [ "rustc_data_structures", "rustc_errors", "rustc_index", + "rustc_parse", "rustc_target", "syntax", "syntax_pos", @@ -3762,6 +3781,7 @@ dependencies = [ "rustc", "rustc_codegen_utils", "rustc_data_structures", + "rustc_parse", "serde_json", "syntax", "syntax_pos", @@ -4371,14 +4391,11 @@ dependencies = [ name = "syntax_expand" version = "0.0.0" dependencies = [ - "bitflags", - "lazy_static 1.3.0", "log", "rustc_data_structures", "rustc_errors", - "rustc_index", "rustc_lexer", - "scoped-tls", + "rustc_parse", "serialize", "smallvec 1.0.0", "syntax", @@ -4393,6 +4410,7 @@ dependencies = [ "log", "rustc_data_structures", "rustc_errors", + "rustc_parse", "rustc_target", "smallvec 1.0.0", "syntax", diff --git a/src/librustc_driver/Cargo.toml b/src/librustc_driver/Cargo.toml index a9e4e6db1c7..19726d6aff2 100644 --- a/src/librustc_driver/Cargo.toml +++ b/src/librustc_driver/Cargo.toml @@ -21,6 +21,7 @@ rustc_data_structures = { path = "../librustc_data_structures" } errors = { path = "../librustc_errors", package = "rustc_errors" } rustc_metadata = { path = "../librustc_metadata" } rustc_mir = { path = "../librustc_mir" } +rustc_parse = { path = "../librustc_parse" } rustc_plugin = { path = "../librustc_plugin/deprecated" } # To get this in the sysroot rustc_plugin_impl = { path = "../librustc_plugin" } rustc_save_analysis = { path = "../librustc_save_analysis" } diff --git a/src/librustc_driver/lib.rs b/src/librustc_driver/lib.rs index 611b891d99a..380cbed4b21 100644 --- a/src/librustc_driver/lib.rs +++ b/src/librustc_driver/lib.rs @@ -65,7 +65,6 @@ use std::time::Instant; use syntax::ast; use syntax::source_map::FileLoader; use syntax::feature_gate::{GatedCfg, UnstableFeatures}; -use syntax::parse; use syntax::symbol::sym; use syntax_pos::{DUMMY_SP, FileName}; @@ -1096,14 +1095,16 @@ pub fn handle_options(args: &[String]) -> Option { } fn parse_crate_attrs<'a>(sess: &'a Session, input: &Input) -> PResult<'a, Vec> { - match *input { - Input::File(ref ifile) => { - parse::parse_crate_attrs_from_file(ifile, &sess.parse_sess) + match input { + Input::File(ifile) => { + rustc_parse::parse_crate_attrs_from_file(ifile, &sess.parse_sess) } - Input::Str { ref name, ref input } => { - parse::parse_crate_attrs_from_source_str(name.clone(), - input.clone(), - &sess.parse_sess) + Input::Str { name, input } => { + rustc_parse::parse_crate_attrs_from_source_str( + name.clone(), + input.clone(), + &sess.parse_sess, + ) } } } diff --git a/src/librustc_interface/Cargo.toml b/src/librustc_interface/Cargo.toml index 35b93db1d65..de59882bbdf 100644 --- a/src/librustc_interface/Cargo.toml +++ b/src/librustc_interface/Cargo.toml @@ -16,6 +16,7 @@ smallvec = { version = "1.0", features = ["union", "may_dangle"] } syntax = { path = "../libsyntax" } syntax_ext = { path = "../libsyntax_ext" } syntax_expand = { path = "../libsyntax_expand" } +rustc_parse = { path = "../librustc_parse" } syntax_pos = { path = "../libsyntax_pos" } rustc_serialize = { path = "../libserialize", package = "serialize" } rustc = { path = "../librustc" } diff --git a/src/librustc_interface/interface.rs b/src/librustc_interface/interface.rs index 61f30392e06..02068b2ce38 100644 --- a/src/librustc_interface/interface.rs +++ b/src/librustc_interface/interface.rs @@ -11,11 +11,11 @@ use rustc_codegen_utils::codegen_backend::CodegenBackend; use rustc_data_structures::OnDrop; use rustc_data_structures::sync::Lrc; use rustc_data_structures::fx::{FxHashSet, FxHashMap}; +use rustc_parse::new_parser_from_source_str; use std::path::PathBuf; use std::result; use std::sync::{Arc, Mutex}; use syntax::ast::{self, MetaItemKind}; -use syntax::parse::new_parser_from_source_str; use syntax::token; use syntax::source_map::{FileName, FileLoader, SourceMap}; use syntax::sess::ParseSess; diff --git a/src/librustc_interface/passes.rs b/src/librustc_interface/passes.rs index c874e94124d..0e38de9a0ed 100644 --- a/src/librustc_interface/passes.rs +++ b/src/librustc_interface/passes.rs @@ -26,6 +26,7 @@ use rustc_errors::PResult; use rustc_incremental; use rustc_metadata::cstore; use rustc_mir as mir; +use rustc_parse::{parse_crate_from_file, parse_crate_from_source_str}; use rustc_passes::{self, ast_validation, hir_stats, layout_test}; use rustc_plugin as plugin; use rustc_plugin::registry::Registry; @@ -37,7 +38,6 @@ use syntax::{self, ast, visit}; use syntax::early_buffered_lints::BufferedEarlyLint; use syntax_expand::base::{NamedSyntaxExtension, ExtCtxt}; use syntax::mut_visit::MutVisitor; -use syntax::parse; use syntax::util::node_count::NodeCounter; use syntax::symbol::Symbol; use syntax_pos::FileName; @@ -60,12 +60,11 @@ pub fn parse<'a>(sess: &'a Session, input: &Input) -> PResult<'a, ast::Crate> { let krate = time(sess, "parsing", || { let _prof_timer = sess.prof.generic_activity("parse_crate"); - match *input { - Input::File(ref file) => parse::parse_crate_from_file(file, &sess.parse_sess), - Input::Str { - ref input, - ref name, - } => parse::parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess), + match input { + Input::File(file) => parse_crate_from_file(file, &sess.parse_sess), + Input::Str { input, name } => { + parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess) + } } })?; @@ -484,7 +483,7 @@ pub fn lower_to_hir( ) -> Result { // Lower AST to HIR. let hir_forest = time(sess, "lowering AST -> HIR", || { - let nt_to_tokenstream = syntax::parse::nt_to_tokenstream; + let nt_to_tokenstream = rustc_parse::nt_to_tokenstream; let hir_crate = lower_crate(sess, &dep_graph, &krate, resolver, nt_to_tokenstream); if sess.opts.debugging_opts.hir_stats { diff --git a/src/librustc_interface/util.rs b/src/librustc_interface/util.rs index c02e5b9ae28..1cdb0ac87c5 100644 --- a/src/librustc_interface/util.rs +++ b/src/librustc_interface/util.rs @@ -50,6 +50,7 @@ pub fn diagnostics_registry() -> Registry { // FIXME: need to figure out a way to get these back in here // all_errors.extend_from_slice(get_codegen_backend(sess).diagnostics()); all_errors.extend_from_slice(&rustc_metadata::error_codes::DIAGNOSTICS); + all_errors.extend_from_slice(&rustc_parse::error_codes::DIAGNOSTICS); all_errors.extend_from_slice(&rustc_passes::error_codes::DIAGNOSTICS); all_errors.extend_from_slice(&rustc_plugin::error_codes::DIAGNOSTICS); all_errors.extend_from_slice(&rustc_mir::error_codes::DIAGNOSTICS); diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index c50808adec1..3cecb4317b1 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -1,7 +1,7 @@ //! Low-level Rust lexer. //! //! Tokens produced by this lexer are not yet ready for parsing the Rust syntax, -//! for that see `libsyntax::parse::lexer`, which converts this basic token stream +//! for that see `librustc_parse::lexer`, which converts this basic token stream //! into wide tokens used by actual parser. //! //! The purpose of this crate is to convert raw sources into a labeled sequence diff --git a/src/librustc_metadata/Cargo.toml b/src/librustc_metadata/Cargo.toml index 5bc047e001b..b1431563f21 100644 --- a/src/librustc_metadata/Cargo.toml +++ b/src/librustc_metadata/Cargo.toml @@ -23,4 +23,5 @@ rustc_serialize = { path = "../libserialize", package = "serialize" } stable_deref_trait = "1.0.0" syntax = { path = "../libsyntax" } syntax_expand = { path = "../libsyntax_expand" } +rustc_parse = { path = "../librustc_parse" } syntax_pos = { path = "../libsyntax_pos" } diff --git a/src/librustc_metadata/rmeta/decoder/cstore_impl.rs b/src/librustc_metadata/rmeta/decoder/cstore_impl.rs index 6eacfc28de2..19cfdeac57e 100644 --- a/src/librustc_metadata/rmeta/decoder/cstore_impl.rs +++ b/src/librustc_metadata/rmeta/decoder/cstore_impl.rs @@ -18,6 +18,8 @@ use rustc::hir::map::{DefKey, DefPath, DefPathHash}; use rustc::hir::map::definitions::DefPathTable; use rustc::util::nodemap::DefIdMap; use rustc_data_structures::svh::Svh; +use rustc_parse::source_file_to_stream; +use rustc_parse::parser::emit_unclosed_delims; use smallvec::SmallVec; use std::any::Any; @@ -27,8 +29,6 @@ use std::sync::Arc; use syntax::ast; use syntax::attr; use syntax::source_map; -use syntax::parse::source_file_to_stream; -use syntax::parse::parser::emit_unclosed_delims; use syntax::source_map::Spanned; use syntax::symbol::Symbol; use syntax_pos::{Span, FileName}; diff --git a/src/librustc_parse/Cargo.toml b/src/librustc_parse/Cargo.toml new file mode 100644 index 00000000000..4579f9d472d --- /dev/null +++ b/src/librustc_parse/Cargo.toml @@ -0,0 +1,21 @@ +[package] +authors = ["The Rust Project Developers"] +name = "rustc_parse" +version = "0.0.0" +edition = "2018" + +[lib] +name = "rustc_parse" +path = "lib.rs" +doctest = false + +[dependencies] +bitflags = "1.0" +log = "0.4" +syntax_pos = { path = "../libsyntax_pos" } +syntax = { path = "../libsyntax" } +errors = { path = "../librustc_errors", package = "rustc_errors" } +rustc_data_structures = { path = "../librustc_data_structures" } +rustc_lexer = { path = "../librustc_lexer" } +rustc_target = { path = "../librustc_target" } +smallvec = { version = "1.0", features = ["union", "may_dangle"] } diff --git a/src/librustc_parse/error_codes.rs b/src/librustc_parse/error_codes.rs new file mode 100644 index 00000000000..cf74e09a377 --- /dev/null +++ b/src/librustc_parse/error_codes.rs @@ -0,0 +1,174 @@ +// Error messages for EXXXX errors. +// Each message should start and end with a new line, and be wrapped to 80 +// characters. In vim you can `:set tw=80` and use `gq` to wrap paragraphs. Use +// `:set tw=0` to disable. +syntax::register_diagnostics! { + +E0178: r##" +In types, the `+` type operator has low precedence, so it is often necessary +to use parentheses. + +For example: + +```compile_fail,E0178 +trait Foo {} + +struct Bar<'a> { + w: &'a Foo + Copy, // error, use &'a (Foo + Copy) + x: &'a Foo + 'a, // error, use &'a (Foo + 'a) + y: &'a mut Foo + 'a, // error, use &'a mut (Foo + 'a) + z: fn() -> Foo + 'a, // error, use fn() -> (Foo + 'a) +} +``` + +More details can be found in [RFC 438]. + +[RFC 438]: https://github.com/rust-lang/rfcs/pull/438 +"##, + +E0583: r##" +A file wasn't found for an out-of-line module. + +Erroneous code example: + +```ignore (compile_fail not working here; see Issue #43707) +mod file_that_doesnt_exist; // error: file not found for module + +fn main() {} +``` + +Please be sure that a file corresponding to the module exists. If you +want to use a module named `file_that_doesnt_exist`, you need to have a file +named `file_that_doesnt_exist.rs` or `file_that_doesnt_exist/mod.rs` in the +same directory. +"##, + +E0584: r##" +A doc comment that is not attached to anything has been encountered. + +Erroneous code example: + +```compile_fail,E0584 +trait Island { + fn lost(); + + /// I'm lost! +} +``` + +A little reminder: a doc comment has to be placed before the item it's supposed +to document. So if you want to document the `Island` trait, you need to put a +doc comment before it, not inside it. Same goes for the `lost` method: the doc +comment needs to be before it: + +``` +/// I'm THE island! +trait Island { + /// I'm lost! + fn lost(); +} +``` +"##, + +E0585: r##" +A documentation comment that doesn't document anything was found. + +Erroneous code example: + +```compile_fail,E0585 +fn main() { + // The following doc comment will fail: + /// This is a useless doc comment! +} +``` + +Documentation comments need to be followed by items, including functions, +types, modules, etc. Examples: + +``` +/// I'm documenting the following struct: +struct Foo; + +/// I'm documenting the following function: +fn foo() {} +``` +"##, + +E0586: r##" +An inclusive range was used with no end. + +Erroneous code example: + +```compile_fail,E0586 +fn main() { + let tmp = vec![0, 1, 2, 3, 4, 4, 3, 3, 2, 1]; + let x = &tmp[1..=]; // error: inclusive range was used with no end +} +``` + +An inclusive range needs an end in order to *include* it. If you just need a +start and no end, use a non-inclusive range (with `..`): + +``` +fn main() { + let tmp = vec![0, 1, 2, 3, 4, 4, 3, 3, 2, 1]; + let x = &tmp[1..]; // ok! +} +``` + +Or put an end to your inclusive range: + +``` +fn main() { + let tmp = vec![0, 1, 2, 3, 4, 4, 3, 3, 2, 1]; + let x = &tmp[1..=3]; // ok! +} +``` +"##, + +E0704: r##" +This error indicates that a incorrect visibility restriction was specified. + +Example of erroneous code: + +```compile_fail,E0704 +mod foo { + pub(foo) struct Bar { + x: i32 + } +} +``` + +To make struct `Bar` only visible in module `foo` the `in` keyword should be +used: +``` +mod foo { + pub(in crate::foo) struct Bar { + x: i32 + } +} +# fn main() {} +``` + +For more information see the Rust Reference on [Visibility]. + +[Visibility]: https://doc.rust-lang.org/reference/visibility-and-privacy.html +"##, + +E0743: r##" +C-variadic has been used on a non-foreign function. + +Erroneous code example: + +```compile_fail,E0743 +fn foo2(x: u8, ...) {} // error! +``` + +Only foreign functions can use C-variadic (`...`). It is used to give an +undefined number of parameters to a given function (like `printf` in C). The +equivalent in Rust would be to use macros directly. +"##, + +; + +} diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs new file mode 100644 index 00000000000..5de63cb39d1 --- /dev/null +++ b/src/librustc_parse/lexer/mod.rs @@ -0,0 +1,643 @@ +use syntax::token::{self, Token, TokenKind}; +use syntax::sess::ParseSess; +use syntax::symbol::{sym, Symbol}; +use syntax::util::comments; + +use errors::{FatalError, DiagnosticBuilder}; +use syntax_pos::{BytePos, Pos, Span}; +use rustc_lexer::Base; +use rustc_lexer::unescape; + +use std::char; +use std::convert::TryInto; +use rustc_data_structures::sync::Lrc; +use log::debug; + +mod tokentrees; +mod unicode_chars; +mod unescape_error_reporting; +use unescape_error_reporting::{emit_unescape_error, push_escaped_char}; + +#[derive(Clone, Debug)] +pub struct UnmatchedBrace { + pub expected_delim: token::DelimToken, + pub found_delim: Option, + pub found_span: Span, + pub unclosed_span: Option, + pub candidate_span: Option, +} + +pub struct StringReader<'a> { + sess: &'a ParseSess, + /// Initial position, read-only. + start_pos: BytePos, + /// The absolute offset within the source_map of the current character. + // FIXME(#64197): `pub` is needed by tests for now. + pub pos: BytePos, + /// Stop reading src at this index. + end_src_index: usize, + /// Source text to tokenize. + src: Lrc, + override_span: Option, +} + +impl<'a> StringReader<'a> { + pub fn new(sess: &'a ParseSess, + source_file: Lrc, + override_span: Option) -> Self { + if source_file.src.is_none() { + sess.span_diagnostic.bug(&format!("cannot lex `source_file` without source: {}", + source_file.name)); + } + + let src = (*source_file.src.as_ref().unwrap()).clone(); + + StringReader { + sess, + start_pos: source_file.start_pos, + pos: source_file.start_pos, + end_src_index: src.len(), + src, + override_span, + } + } + + pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { + let begin = sess.source_map().lookup_byte_offset(span.lo()); + let end = sess.source_map().lookup_byte_offset(span.hi()); + + // Make the range zero-length if the span is invalid. + if begin.sf.start_pos != end.sf.start_pos { + span = span.shrink_to_lo(); + } + + let mut sr = StringReader::new(sess, begin.sf, None); + + // Seek the lexer to the right byte range. + sr.end_src_index = sr.src_index(span.hi()); + + sr + } + + + fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { + self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi)) + } + + /// Returns the next token, including trivia like whitespace or comments. + /// + /// `Err(())` means that some errors were encountered, which can be + /// retrieved using `buffer_fatal_errors`. + pub fn next_token(&mut self) -> Token { + let start_src_index = self.src_index(self.pos); + let text: &str = &self.src[start_src_index..self.end_src_index]; + + if text.is_empty() { + let span = self.mk_sp(self.pos, self.pos); + return Token::new(token::Eof, span); + } + + { + let is_beginning_of_file = self.pos == self.start_pos; + if is_beginning_of_file { + if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { + let start = self.pos; + self.pos = self.pos + BytePos::from_usize(shebang_len); + + let sym = self.symbol_from(start + BytePos::from_usize("#!".len())); + let kind = token::Shebang(sym); + + let span = self.mk_sp(start, self.pos); + return Token::new(kind, span); + } + } + } + + let token = rustc_lexer::first_token(text); + + let start = self.pos; + self.pos = self.pos + BytePos::from_usize(token.len); + + debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start)); + + // This could use `?`, but that makes code significantly (10-20%) slower. + // https://github.com/rust-lang/rust/issues/37939 + let kind = self.cook_lexer_token(token.kind, start); + + let span = self.mk_sp(start, self.pos); + Token::new(kind, span) + } + + /// Report a fatal lexical error with a given span. + fn fatal_span(&self, sp: Span, m: &str) -> FatalError { + self.sess.span_diagnostic.span_fatal(sp, m) + } + + /// Report a lexical error with a given span. + fn err_span(&self, sp: Span, m: &str) { + self.sess.span_diagnostic.struct_span_err(sp, m).emit(); + } + + + /// Report a fatal error spanning [`from_pos`, `to_pos`). + fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError { + self.fatal_span(self.mk_sp(from_pos, to_pos), m) + } + + /// Report a lexical error spanning [`from_pos`, `to_pos`). + fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { + self.err_span(self.mk_sp(from_pos, to_pos), m) + } + + fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str) + -> DiagnosticBuilder<'a> + { + self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m) + } + + fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) + -> DiagnosticBuilder<'a> + { + let mut m = m.to_string(); + m.push_str(": "); + push_escaped_char(&mut m, c); + + self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..]) + } + + /// Turns simple `rustc_lexer::TokenKind` enum into a rich + /// `libsyntax::TokenKind`. This turns strings into interned + /// symbols and runs additional validation. + fn cook_lexer_token( + &self, + token: rustc_lexer::TokenKind, + start: BytePos, + ) -> TokenKind { + match token { + rustc_lexer::TokenKind::LineComment => { + let string = self.str_from(start); + // comments with only more "/"s are not doc comments + let tok = if comments::is_line_doc_comment(string) { + self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment"); + token::DocComment(Symbol::intern(string)) + } else { + token::Comment + }; + + tok + } + rustc_lexer::TokenKind::BlockComment { terminated } => { + let string = self.str_from(start); + // block comments starting with "/**" or "/*!" are doc-comments + // but comments with only "*"s between two "/"s are not + let is_doc_comment = comments::is_block_doc_comment(string); + + if !terminated { + let msg = if is_doc_comment { + "unterminated block doc-comment" + } else { + "unterminated block comment" + }; + let last_bpos = self.pos; + self.fatal_span_(start, last_bpos, msg).raise(); + } + + let tok = if is_doc_comment { + self.forbid_bare_cr(start, + string, + "bare CR not allowed in block doc-comment"); + token::DocComment(Symbol::intern(string)) + } else { + token::Comment + }; + + tok + } + rustc_lexer::TokenKind::Whitespace => token::Whitespace, + rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => { + let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent; + let mut ident_start = start; + if is_raw_ident { + ident_start = ident_start + BytePos(2); + } + // FIXME: perform NFKC normalization here. (Issue #2253) + let sym = self.symbol_from(ident_start); + if is_raw_ident { + let span = self.mk_sp(start, self.pos); + if !sym.can_be_raw() { + self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); + } + self.sess.raw_identifier_spans.borrow_mut().push(span); + } + token::Ident(sym, is_raw_ident) + } + rustc_lexer::TokenKind::Literal { kind, suffix_start } => { + let suffix_start = start + BytePos(suffix_start as u32); + let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); + let suffix = if suffix_start < self.pos { + let string = self.str_from(suffix_start); + if string == "_" { + self.sess.span_diagnostic + .struct_span_warn(self.mk_sp(suffix_start, self.pos), + "underscore literal suffix is not allowed") + .warn("this was previously accepted by the compiler but is \ + being phased out; it will become a hard error in \ + a future release!") + .note("for more information, see issue #42326 \ + ") + .emit(); + None + } else { + Some(Symbol::intern(string)) + } + } else { + None + }; + token::Literal(token::Lit { kind, symbol, suffix }) + } + rustc_lexer::TokenKind::Lifetime { starts_with_number } => { + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let lifetime_name = self.str_from(start); + if starts_with_number { + self.err_span_( + start, + self.pos, + "lifetimes cannot start with a number", + ); + } + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident) + } + rustc_lexer::TokenKind::Semi => token::Semi, + rustc_lexer::TokenKind::Comma => token::Comma, + rustc_lexer::TokenKind::Dot => token::Dot, + rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren), + rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren), + rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace), + rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace), + rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket), + rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket), + rustc_lexer::TokenKind::At => token::At, + rustc_lexer::TokenKind::Pound => token::Pound, + rustc_lexer::TokenKind::Tilde => token::Tilde, + rustc_lexer::TokenKind::Question => token::Question, + rustc_lexer::TokenKind::Colon => token::Colon, + rustc_lexer::TokenKind::Dollar => token::Dollar, + rustc_lexer::TokenKind::Eq => token::Eq, + rustc_lexer::TokenKind::Not => token::Not, + rustc_lexer::TokenKind::Lt => token::Lt, + rustc_lexer::TokenKind::Gt => token::Gt, + rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus), + rustc_lexer::TokenKind::And => token::BinOp(token::And), + rustc_lexer::TokenKind::Or => token::BinOp(token::Or), + rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus), + rustc_lexer::TokenKind::Star => token::BinOp(token::Star), + rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash), + rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), + rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), + + rustc_lexer::TokenKind::Unknown => { + let c = self.str_from(start).chars().next().unwrap(); + let mut err = self.struct_fatal_span_char(start, + self.pos, + "unknown start of token", + c); + // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, + // instead of keeping a table in `check_for_substitution`into the token. Ideally, + // this should be inside `rustc_lexer`. However, we should first remove compound + // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, + // as there will be less overall work to do this way. + let token = unicode_chars::check_for_substitution(self, start, c, &mut err) + .unwrap_or_else(|| token::Unknown(self.symbol_from(start))); + err.emit(); + token + } + } + } + + fn cook_lexer_literal( + &self, + start: BytePos, + suffix_start: BytePos, + kind: rustc_lexer::LiteralKind + ) -> (token::LitKind, Symbol) { + match kind { + rustc_lexer::LiteralKind::Char { terminated } => { + if !terminated { + self.fatal_span_(start, suffix_start, + "unterminated character literal".into()) + .raise() + } + let content_start = start + BytePos(1); + let content_end = suffix_start - BytePos(1); + self.validate_char_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::Char, id) + }, + rustc_lexer::LiteralKind::Byte { terminated } => { + if !terminated { + self.fatal_span_(start + BytePos(1), suffix_start, + "unterminated byte constant".into()) + .raise() + } + let content_start = start + BytePos(2); + let content_end = suffix_start - BytePos(1); + self.validate_byte_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::Byte, id) + }, + rustc_lexer::LiteralKind::Str { terminated } => { + if !terminated { + self.fatal_span_(start, suffix_start, + "unterminated double quote string".into()) + .raise() + } + let content_start = start + BytePos(1); + let content_end = suffix_start - BytePos(1); + self.validate_str_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::Str, id) + } + rustc_lexer::LiteralKind::ByteStr { terminated } => { + if !terminated { + self.fatal_span_(start + BytePos(1), suffix_start, + "unterminated double quote byte string".into()) + .raise() + } + let content_start = start + BytePos(2); + let content_end = suffix_start - BytePos(1); + self.validate_byte_str_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::ByteStr, id) + } + rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => { + if !started { + self.report_non_started_raw_string(start); + } + if !terminated { + self.report_unterminated_raw_string(start, n_hashes) + } + let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); + let n = u32::from(n_hashes); + let content_start = start + BytePos(2 + n); + let content_end = suffix_start - BytePos(1 + n); + self.validate_raw_str_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::StrRaw(n_hashes), id) + } + rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => { + if !started { + self.report_non_started_raw_string(start); + } + if !terminated { + self.report_unterminated_raw_string(start, n_hashes) + } + let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); + let n = u32::from(n_hashes); + let content_start = start + BytePos(3 + n); + let content_end = suffix_start - BytePos(1 + n); + self.validate_raw_byte_str_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::ByteStrRaw(n_hashes), id) + } + rustc_lexer::LiteralKind::Int { base, empty_int } => { + if empty_int { + self.err_span_(start, suffix_start, "no valid digits found for number"); + (token::Integer, sym::integer(0)) + } else { + self.validate_int_literal(base, start, suffix_start); + (token::Integer, self.symbol_from_to(start, suffix_start)) + } + }, + rustc_lexer::LiteralKind::Float { base, empty_exponent } => { + if empty_exponent { + let mut err = self.struct_span_fatal( + start, self.pos, + "expected at least one digit in exponent" + ); + err.emit(); + } + + match base { + Base::Hexadecimal => { + self.err_span_(start, suffix_start, + "hexadecimal float literal is not supported") + } + Base::Octal => { + self.err_span_(start, suffix_start, + "octal float literal is not supported") + } + Base::Binary => { + self.err_span_(start, suffix_start, + "binary float literal is not supported") + } + _ => () + } + + let id = self.symbol_from_to(start, suffix_start); + (token::Float, id) + }, + } + } + + #[inline] + fn src_index(&self, pos: BytePos) -> usize { + (pos - self.start_pos).to_usize() + } + + /// Slice of the source text from `start` up to but excluding `self.pos`, + /// meaning the slice does not include the character `self.ch`. + fn str_from(&self, start: BytePos) -> &str + { + self.str_from_to(start, self.pos) + } + + /// Creates a Symbol from a given offset to the current offset. + fn symbol_from(&self, start: BytePos) -> Symbol { + debug!("taking an ident from {:?} to {:?}", start, self.pos); + Symbol::intern(self.str_from(start)) + } + + /// As symbol_from, with an explicit endpoint. + fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol { + debug!("taking an ident from {:?} to {:?}", start, end); + Symbol::intern(self.str_from_to(start, end)) + } + + /// Slice of the source text spanning from `start` up to but excluding `end`. + fn str_from_to(&self, start: BytePos, end: BytePos) -> &str + { + &self.src[self.src_index(start)..self.src_index(end)] + } + + fn forbid_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) { + let mut idx = 0; + loop { + idx = match s[idx..].find('\r') { + None => break, + Some(it) => idx + it + 1 + }; + self.err_span_(start + BytePos(idx as u32 - 1), + start + BytePos(idx as u32), + errmsg); + } + } + + fn report_non_started_raw_string(&self, start: BytePos) -> ! { + let bad_char = self.str_from(start).chars().last().unwrap(); + self + .struct_fatal_span_char( + start, + self.pos, + "found invalid character; only `#` is allowed \ + in raw string delimitation", + bad_char, + ) + .emit(); + FatalError.raise() + } + + fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! { + let mut err = self.struct_span_fatal( + start, start, + "unterminated raw string", + ); + err.span_label( + self.mk_sp(start, start), + "unterminated raw string", + ); + + if n_hashes > 0 { + err.note(&format!("this raw string should be terminated with `\"{}`", + "#".repeat(n_hashes as usize))); + } + + err.emit(); + FatalError.raise() + } + + fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 { + match n_hashes.try_into() { + Ok(n_hashes) => n_hashes, + Err(_) => { + self.fatal_span_(start, + self.pos, + "too many `#` symbols: raw strings may be \ + delimited by up to 65535 `#` symbols").raise(); + } + } + } + + fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + if let Err((off, err)) = unescape::unescape_char(lit) { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Char, + 0..off, + err, + ) + } + } + + fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + if let Err((off, err)) = unescape::unescape_byte(lit) { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Byte, + 0..off, + err, + ) + } + } + + fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Str, + range, + err, + ) + } + }) + } + + fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_raw_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Str, + range, + err, + ) + } + }) + } + + fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_raw_byte_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::ByteStr, + range, + err, + ) + } + }) + } + + fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_byte_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::ByteStr, + range, + err, + ) + } + }) + } + + fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { + let base = match base { + Base::Binary => 2, + Base::Octal => 8, + _ => return, + }; + let s = self.str_from_to(content_start + BytePos(2), content_end); + for (idx, c) in s.char_indices() { + let idx = idx as u32; + if c != '_' && c.to_digit(base).is_none() { + let lo = content_start + BytePos(2 + idx); + let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); + self.err_span_(lo, hi, + &format!("invalid digit for a base {} literal", base)); + + } + } + } +} diff --git a/src/librustc_parse/lexer/tokentrees.rs b/src/librustc_parse/lexer/tokentrees.rs new file mode 100644 index 00000000000..1353591308b --- /dev/null +++ b/src/librustc_parse/lexer/tokentrees.rs @@ -0,0 +1,280 @@ +use rustc_data_structures::fx::FxHashMap; +use syntax_pos::Span; + +use super::{StringReader, UnmatchedBrace}; + +use syntax::print::pprust::token_to_string; +use syntax::token::{self, Token}; +use syntax::tokenstream::{DelimSpan, IsJoint::{self, *}, TokenStream, TokenTree, TreeAndJoint}; + +use errors::PResult; + +impl<'a> StringReader<'a> { + crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec) { + let mut tt_reader = TokenTreesReader { + string_reader: self, + token: Token::dummy(), + joint_to_prev: Joint, + open_braces: Vec::new(), + unmatched_braces: Vec::new(), + matching_delim_spans: Vec::new(), + last_unclosed_found_span: None, + last_delim_empty_block_spans: FxHashMap::default() + }; + let res = tt_reader.parse_all_token_trees(); + (res, tt_reader.unmatched_braces) + } +} + +struct TokenTreesReader<'a> { + string_reader: StringReader<'a>, + token: Token, + joint_to_prev: IsJoint, + /// Stack of open delimiters and their spans. Used for error message. + open_braces: Vec<(token::DelimToken, Span)>, + unmatched_braces: Vec, + /// The type and spans for all braces + /// + /// Used only for error recovery when arriving to EOF with mismatched braces. + matching_delim_spans: Vec<(token::DelimToken, Span, Span)>, + last_unclosed_found_span: Option, + last_delim_empty_block_spans: FxHashMap +} + +impl<'a> TokenTreesReader<'a> { + // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. + fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { + let mut buf = TokenStreamBuilder::default(); + + self.real_token(); + while self.token != token::Eof { + buf.push(self.parse_token_tree()?); + } + + Ok(buf.into_token_stream()) + } + + // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`. + fn parse_token_trees_until_close_delim(&mut self) -> TokenStream { + let mut buf = TokenStreamBuilder::default(); + loop { + if let token::CloseDelim(..) = self.token.kind { + return buf.into_token_stream(); + } + + match self.parse_token_tree() { + Ok(tree) => buf.push(tree), + Err(mut e) => { + e.emit(); + return buf.into_token_stream(); + } + } + } + } + + fn parse_token_tree(&mut self) -> PResult<'a, TreeAndJoint> { + let sm = self.string_reader.sess.source_map(); + match self.token.kind { + token::Eof => { + let msg = "this file contains an un-closed delimiter"; + let mut err = self.string_reader.sess.span_diagnostic + .struct_span_err(self.token.span, msg); + for &(_, sp) in &self.open_braces { + err.span_label(sp, "un-closed delimiter"); + self.unmatched_braces.push(UnmatchedBrace { + expected_delim: token::DelimToken::Brace, + found_delim: None, + found_span: self.token.span, + unclosed_span: Some(sp), + candidate_span: None, + }); + } + + if let Some((delim, _)) = self.open_braces.last() { + if let Some((_, open_sp, close_sp)) = self.matching_delim_spans.iter() + .filter(|(d, open_sp, close_sp)| { + if let Some(close_padding) = sm.span_to_margin(*close_sp) { + if let Some(open_padding) = sm.span_to_margin(*open_sp) { + return delim == d && close_padding != open_padding; + } + } + false + }).next() // these are in reverse order as they get inserted on close, but + { // we want the last open/first close + err.span_label( + *open_sp, + "this delimiter might not be properly closed...", + ); + err.span_label( + *close_sp, + "...as it matches this but it has different indentation", + ); + } + } + Err(err) + }, + token::OpenDelim(delim) => { + // The span for beginning of the delimited section + let pre_span = self.token.span; + + // Parse the open delimiter. + self.open_braces.push((delim, self.token.span)); + self.real_token(); + + // Parse the token trees within the delimiters. + // We stop at any delimiter so we can try to recover if the user + // uses an incorrect delimiter. + let tts = self.parse_token_trees_until_close_delim(); + + // Expand to cover the entire delimited token tree + let delim_span = DelimSpan::from_pair(pre_span, self.token.span); + + match self.token.kind { + // Correct delimiter. + token::CloseDelim(d) if d == delim => { + let (open_brace, open_brace_span) = self.open_braces.pop().unwrap(); + let close_brace_span = self.token.span; + + if tts.is_empty() { + let empty_block_span = open_brace_span.to(close_brace_span); + self.last_delim_empty_block_spans.insert(delim, empty_block_span); + } + + if self.open_braces.len() == 0 { + // Clear up these spans to avoid suggesting them as we've found + // properly matched delimiters so far for an entire block. + self.matching_delim_spans.clear(); + } else { + self.matching_delim_spans.push( + (open_brace, open_brace_span, close_brace_span), + ); + } + // Parse the close delimiter. + self.real_token(); + } + // Incorrect delimiter. + token::CloseDelim(other) => { + let mut unclosed_delimiter = None; + let mut candidate = None; + if self.last_unclosed_found_span != Some(self.token.span) { + // do not complain about the same unclosed delimiter multiple times + self.last_unclosed_found_span = Some(self.token.span); + // This is a conservative error: only report the last unclosed + // delimiter. The previous unclosed delimiters could actually be + // closed! The parser just hasn't gotten to them yet. + if let Some(&(_, sp)) = self.open_braces.last() { + unclosed_delimiter = Some(sp); + }; + if let Some(current_padding) = sm.span_to_margin(self.token.span) { + for (brace, brace_span) in &self.open_braces { + if let Some(padding) = sm.span_to_margin(*brace_span) { + // high likelihood of these two corresponding + if current_padding == padding && brace == &other { + candidate = Some(*brace_span); + } + } + } + } + let (tok, _) = self.open_braces.pop().unwrap(); + self.unmatched_braces.push(UnmatchedBrace { + expected_delim: tok, + found_delim: Some(other), + found_span: self.token.span, + unclosed_span: unclosed_delimiter, + candidate_span: candidate, + }); + } else { + self.open_braces.pop(); + } + + // If the incorrect delimiter matches an earlier opening + // delimiter, then don't consume it (it can be used to + // close the earlier one). Otherwise, consume it. + // E.g., we try to recover from: + // fn foo() { + // bar(baz( + // } // Incorrect delimiter but matches the earlier `{` + if !self.open_braces.iter().any(|&(b, _)| b == other) { + self.real_token(); + } + } + token::Eof => { + // Silently recover, the EOF token will be seen again + // and an error emitted then. Thus we don't pop from + // self.open_braces here. + }, + _ => {} + } + + Ok(TokenTree::Delimited( + delim_span, + delim, + tts.into() + ).into()) + }, + token::CloseDelim(delim) => { + // An unexpected closing delimiter (i.e., there is no + // matching opening delimiter). + let token_str = token_to_string(&self.token); + let msg = format!("unexpected close delimiter: `{}`", token_str); + let mut err = self.string_reader.sess.span_diagnostic + .struct_span_err(self.token.span, &msg); + + if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) { + err.span_label( + span, + "this block is empty, you might have not meant to close it" + ); + } + err.span_label(self.token.span, "unexpected close delimiter"); + Err(err) + }, + _ => { + let tt = TokenTree::Token(self.token.take()); + self.real_token(); + let is_joint = self.joint_to_prev == Joint && self.token.is_op(); + Ok((tt, if is_joint { Joint } else { NonJoint })) + } + } + } + + fn real_token(&mut self) { + self.joint_to_prev = Joint; + loop { + let token = self.string_reader.next_token(); + match token.kind { + token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => { + self.joint_to_prev = NonJoint; + } + _ => { + self.token = token; + return; + } + } + } + } +} + +#[derive(Default)] +struct TokenStreamBuilder { + buf: Vec, +} + +impl TokenStreamBuilder { + fn push(&mut self, (tree, joint): TreeAndJoint) { + if let Some((TokenTree::Token(prev_token), Joint)) = self.buf.last() { + if let TokenTree::Token(token) = &tree { + if let Some(glued) = prev_token.glue(token) { + self.buf.pop(); + self.buf.push((TokenTree::Token(glued), joint)); + return; + } + } + } + self.buf.push((tree, joint)) + } + + fn into_token_stream(self) -> TokenStream { + TokenStream::new(self.buf) + } +} diff --git a/src/librustc_parse/lexer/unescape_error_reporting.rs b/src/librustc_parse/lexer/unescape_error_reporting.rs new file mode 100644 index 00000000000..a5749d07e62 --- /dev/null +++ b/src/librustc_parse/lexer/unescape_error_reporting.rs @@ -0,0 +1,215 @@ +//! Utilities for rendering escape sequence errors as diagnostics. + +use std::ops::Range; +use std::iter::once; + +use rustc_lexer::unescape::{EscapeError, Mode}; +use syntax_pos::{Span, BytePos}; + +use syntax::errors::{Handler, Applicability}; + +pub(crate) fn emit_unescape_error( + handler: &Handler, + // interior part of the literal, without quotes + lit: &str, + // full span of the literal, including quotes + span_with_quotes: Span, + mode: Mode, + // range of the error inside `lit` + range: Range, + error: EscapeError, +) { + log::debug!("emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", + lit, span_with_quotes, mode, range, error); + let span = { + let Range { start, end } = range; + let (start, end) = (start as u32, end as u32); + let lo = span_with_quotes.lo() + BytePos(start + 1); + let hi = lo + BytePos(end - start); + span_with_quotes + .with_lo(lo) + .with_hi(hi) + }; + let last_char = || { + let c = lit[range.clone()].chars().rev().next().unwrap(); + let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32)); + (c, span) + }; + match error { + EscapeError::LoneSurrogateUnicodeEscape => { + handler.struct_span_err(span, "invalid unicode character escape") + .help("unicode escape must not be a surrogate") + .emit(); + } + EscapeError::OutOfRangeUnicodeEscape => { + handler.struct_span_err(span, "invalid unicode character escape") + .help("unicode escape must be at most 10FFFF") + .emit(); + } + EscapeError::MoreThanOneChar => { + let msg = if mode.is_bytes() { + "if you meant to write a byte string literal, use double quotes" + } else { + "if you meant to write a `str` literal, use double quotes" + }; + + handler + .struct_span_err( + span_with_quotes, + "character literal may only contain one codepoint", + ) + .span_suggestion( + span_with_quotes, + msg, + format!("\"{}\"", lit), + Applicability::MachineApplicable, + ).emit() + } + EscapeError::EscapeOnlyChar => { + let (c, _span) = last_char(); + + let mut msg = if mode.is_bytes() { + "byte constant must be escaped: " + } else { + "character constant must be escaped: " + }.to_string(); + push_escaped_char(&mut msg, c); + + handler.span_err(span, msg.as_str()) + } + EscapeError::BareCarriageReturn => { + let msg = if mode.in_double_quotes() { + "bare CR not allowed in string, use \\r instead" + } else { + "character constant must be escaped: \\r" + }; + handler.span_err(span, msg); + } + EscapeError::BareCarriageReturnInRawString => { + assert!(mode.in_double_quotes()); + let msg = "bare CR not allowed in raw string"; + handler.span_err(span, msg); + } + EscapeError::InvalidEscape => { + let (c, span) = last_char(); + + let label = if mode.is_bytes() { + "unknown byte escape" + } else { + "unknown character escape" + }; + let mut msg = label.to_string(); + msg.push_str(": "); + push_escaped_char(&mut msg, c); + + let mut diag = handler.struct_span_err(span, msg.as_str()); + diag.span_label(span, label); + if c == '{' || c == '}' && !mode.is_bytes() { + diag.help("if used in a formatting string, \ + curly braces are escaped with `{{` and `}}`"); + } else if c == '\r' { + diag.help("this is an isolated carriage return; \ + consider checking your editor and version control settings"); + } + diag.emit(); + } + EscapeError::TooShortHexEscape => { + handler.span_err(span, "numeric character escape is too short") + } + EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => { + let (c, span) = last_char(); + + let mut msg = if error == EscapeError::InvalidCharInHexEscape { + "invalid character in numeric character escape: " + } else { + "invalid character in unicode escape: " + }.to_string(); + push_escaped_char(&mut msg, c); + + handler.span_err(span, msg.as_str()) + } + EscapeError::NonAsciiCharInByte => { + assert!(mode.is_bytes()); + let (_c, span) = last_char(); + handler.span_err(span, "byte constant must be ASCII. \ + Use a \\xHH escape for a non-ASCII byte") + } + EscapeError::NonAsciiCharInByteString => { + assert!(mode.is_bytes()); + let (_c, span) = last_char(); + handler.span_err(span, "raw byte string must be ASCII") + } + EscapeError::OutOfRangeHexEscape => { + handler.span_err(span, "this form of character escape may only be used \ + with characters in the range [\\x00-\\x7f]") + } + EscapeError::LeadingUnderscoreUnicodeEscape => { + let (_c, span) = last_char(); + handler.span_err(span, "invalid start of unicode escape") + } + EscapeError::OverlongUnicodeEscape => { + handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)") + } + EscapeError::UnclosedUnicodeEscape => { + handler.span_err(span, "unterminated unicode escape (needed a `}`)") + } + EscapeError::NoBraceInUnicodeEscape => { + let msg = "incorrect unicode escape sequence"; + let mut diag = handler.struct_span_err(span, msg); + + let mut suggestion = "\\u{".to_owned(); + let mut suggestion_len = 0; + let (c, char_span) = last_char(); + let chars = once(c).chain(lit[range.end..].chars()); + for c in chars.take(6).take_while(|c| c.is_digit(16)) { + suggestion.push(c); + suggestion_len += c.len_utf8(); + } + + if suggestion_len > 0 { + suggestion.push('}'); + let lo = char_span.lo(); + let hi = lo + BytePos(suggestion_len as u32); + diag.span_suggestion( + span.with_lo(lo).with_hi(hi), + "format of unicode escape sequences uses braces", + suggestion, + Applicability::MaybeIncorrect, + ); + } else { + diag.span_label(span, msg); + diag.help( + "format of unicode escape sequences is `\\u{...}`", + ); + } + + diag.emit(); + } + EscapeError::UnicodeEscapeInByte => { + handler.span_err(span, "unicode escape sequences cannot be used \ + as a byte or in a byte string") + } + EscapeError::EmptyUnicodeEscape => { + handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)") + } + EscapeError::ZeroChars => { + handler.span_err(span, "empty character literal") + } + EscapeError::LoneSlash => { + handler.span_err(span, "invalid trailing slash in literal") + } + } +} + +/// Pushes a character to a message string for error reporting +pub(crate) fn push_escaped_char(msg: &mut String, c: char) { + match c { + '\u{20}'..='\u{7e}' => { + // Don't escape \, ' or " for user-facing messages + msg.push(c); + } + _ => { + msg.extend(c.escape_default()); + } + } +} diff --git a/src/librustc_parse/lexer/unicode_chars.rs b/src/librustc_parse/lexer/unicode_chars.rs new file mode 100644 index 00000000000..6eb995b61d3 --- /dev/null +++ b/src/librustc_parse/lexer/unicode_chars.rs @@ -0,0 +1,392 @@ +// Characters and their corresponding confusables were collected from +// http://www.unicode.org/Public/security/10.0.0/confusables.txt + +use super::StringReader; +use errors::{Applicability, DiagnosticBuilder}; +use syntax_pos::{BytePos, Pos, Span, symbol::kw}; +use crate::token; + +#[rustfmt::skip] // for line breaks +const UNICODE_ARRAY: &[(char, &str, char)] = &[ + ('
', "Line Separator", ' '), + ('
', "Paragraph Separator", ' '), + (' ', "Ogham Space mark", ' '), + (' ', "En Quad", ' '), + (' ', "Em Quad", ' '), + (' ', "En Space", ' '), + (' ', "Em Space", ' '), + (' ', "Three-Per-Em Space", ' '), + (' ', "Four-Per-Em Space", ' '), + (' ', "Six-Per-Em Space", ' '), + (' ', "Punctuation Space", ' '), + (' ', "Thin Space", ' '), + (' ', "Hair Space", ' '), + (' ', "Medium Mathematical Space", ' '), + (' ', "No-Break Space", ' '), + (' ', "Figure Space", ' '), + (' ', "Narrow No-Break Space", ' '), + (' ', "Ideographic Space", ' '), + + ('ߺ', "Nko Lajanyalan", '_'), + ('﹍', "Dashed Low Line", '_'), + ('﹎', "Centreline Low Line", '_'), + ('﹏', "Wavy Low Line", '_'), + ('_', "Fullwidth Low Line", '_'), + + ('‐', "Hyphen", '-'), + ('‑', "Non-Breaking Hyphen", '-'), + ('‒', "Figure Dash", '-'), + ('–', "En Dash", '-'), + ('—', "Em Dash", '-'), + ('﹘', "Small Em Dash", '-'), + ('۔', "Arabic Full Stop", '-'), + ('⁃', "Hyphen Bullet", '-'), + ('˗', "Modifier Letter Minus Sign", '-'), + ('−', "Minus Sign", '-'), + ('➖', "Heavy Minus Sign", '-'), + ('Ⲻ', "Coptic Letter Dialect-P Ni", '-'), + ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'), + ('-', "Fullwidth Hyphen-Minus", '-'), + ('―', "Horizontal Bar", '-'), + ('─', "Box Drawings Light Horizontal", '-'), + ('━', "Box Drawings Heavy Horizontal", '-'), + ('㇐', "CJK Stroke H", '-'), + ('ꟷ', "Latin Epigraphic Letter Sideways I", '-'), + ('ᅳ', "Hangul Jungseong Eu", '-'), + ('ㅡ', "Hangul Letter Eu", '-'), + ('一', "CJK Unified Ideograph-4E00", '-'), + ('⼀', "Kangxi Radical One", '-'), + + ('؍', "Arabic Date Separator", ','), + ('٫', "Arabic Decimal Separator", ','), + ('‚', "Single Low-9 Quotation Mark", ','), + ('¸', "Cedilla", ','), + ('ꓹ', "Lisu Letter Tone Na Po", ','), + (',', "Fullwidth Comma", ','), + + (';', "Greek Question Mark", ';'), + (';', "Fullwidth Semicolon", ';'), + ('︔', "Presentation Form For Vertical Semicolon", ';'), + + ('ः', "Devanagari Sign Visarga", ':'), + ('ઃ', "Gujarati Sign Visarga", ':'), + (':', "Fullwidth Colon", ':'), + ('։', "Armenian Full Stop", ':'), + ('܃', "Syriac Supralinear Colon", ':'), + ('܄', "Syriac Sublinear Colon", ':'), + ('᛬', "Runic Multiple Punctuation", ':'), + ('︰', "Presentation Form For Vertical Two Dot Leader", ':'), + ('᠃', "Mongolian Full Stop", ':'), + ('᠉', "Mongolian Manchu Full Stop", ':'), + ('⁚', "Two Dot Punctuation", ':'), + ('׃', "Hebrew Punctuation Sof Pasuq", ':'), + ('˸', "Modifier Letter Raised Colon", ':'), + ('꞉', "Modifier Letter Colon", ':'), + ('∶', "Ratio", ':'), + ('ː', "Modifier Letter Triangular Colon", ':'), + ('ꓽ', "Lisu Letter Tone Mya Jeu", ':'), + ('︓', "Presentation Form For Vertical Colon", ':'), + + ('!', "Fullwidth Exclamation Mark", '!'), + ('ǃ', "Latin Letter Retroflex Click", '!'), + ('ⵑ', "Tifinagh Letter Tuareg Yang", '!'), + ('︕', "Presentation Form For Vertical Exclamation Mark", '!'), + + ('ʔ', "Latin Letter Glottal Stop", '?'), + ('Ɂ', "Latin Capital Letter Glottal Stop", '?'), + ('ॽ', "Devanagari Letter Glottal Stop", '?'), + ('Ꭾ', "Cherokee Letter He", '?'), + ('ꛫ', "Bamum Letter Ntuu", '?'), + ('?', "Fullwidth Question Mark", '?'), + ('︖', "Presentation Form For Vertical Question Mark", '?'), + + ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'), + ('․', "One Dot Leader", '.'), + ('܁', "Syriac Supralinear Full Stop", '.'), + ('܂', "Syriac Sublinear Full Stop", '.'), + ('꘎', "Vai Full Stop", '.'), + ('𐩐', "Kharoshthi Punctuation Dot", '.'), + ('٠', "Arabic-Indic Digit Zero", '.'), + ('۰', "Extended Arabic-Indic Digit Zero", '.'), + ('ꓸ', "Lisu Letter Tone Mya Ti", '.'), + ('·', "Middle Dot", '.'), + ('・', "Katakana Middle Dot", '.'), + ('・', "Halfwidth Katakana Middle Dot", '.'), + ('᛫', "Runic Single Punctuation", '.'), + ('·', "Greek Ano Teleia", '.'), + ('⸱', "Word Separator Middle Dot", '.'), + ('𐄁', "Aegean Word Separator Dot", '.'), + ('•', "Bullet", '.'), + ('‧', "Hyphenation Point", '.'), + ('∙', "Bullet Operator", '.'), + ('⋅', "Dot Operator", '.'), + ('ꞏ', "Latin Letter Sinological Dot", '.'), + ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'), + ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'), + ('.', "Fullwidth Full Stop", '.'), + ('。', "Ideographic Full Stop", '.'), + ('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'), + + ('՝', "Armenian Comma", '\''), + (''', "Fullwidth Apostrophe", '\''), + ('‘', "Left Single Quotation Mark", '\''), + ('’', "Right Single Quotation Mark", '\''), + ('‛', "Single High-Reversed-9 Quotation Mark", '\''), + ('′', "Prime", '\''), + ('‵', "Reversed Prime", '\''), + ('՚', "Armenian Apostrophe", '\''), + ('׳', "Hebrew Punctuation Geresh", '\''), + ('`', "Grave Accent", '\''), + ('`', "Greek Varia", '\''), + ('`', "Fullwidth Grave Accent", '\''), + ('´', "Acute Accent", '\''), + ('΄', "Greek Tonos", '\''), + ('´', "Greek Oxia", '\''), + ('᾽', "Greek Koronis", '\''), + ('᾿', "Greek Psili", '\''), + ('῾', "Greek Dasia", '\''), + ('ʹ', "Modifier Letter Prime", '\''), + ('ʹ', "Greek Numeral Sign", '\''), + ('ˈ', "Modifier Letter Vertical Line", '\''), + ('ˊ', "Modifier Letter Acute Accent", '\''), + ('ˋ', "Modifier Letter Grave Accent", '\''), + ('˴', "Modifier Letter Middle Grave Accent", '\''), + ('ʻ', "Modifier Letter Turned Comma", '\''), + ('ʽ', "Modifier Letter Reversed Comma", '\''), + ('ʼ', "Modifier Letter Apostrophe", '\''), + ('ʾ', "Modifier Letter Right Half Ring", '\''), + ('ꞌ', "Latin Small Letter Saltillo", '\''), + ('י', "Hebrew Letter Yod", '\''), + ('ߴ', "Nko High Tone Apostrophe", '\''), + ('ߵ', "Nko Low Tone Apostrophe", '\''), + ('ᑊ', "Canadian Syllabics West-Cree P", '\''), + ('ᛌ', "Runic Letter Short-Twig-Sol S", '\''), + ('𖽑', "Miao Sign Aspiration", '\''), + ('𖽒', "Miao Sign Reformed Voicing", '\''), + + ('᳓', "Vedic Sign Nihshvasa", '"'), + ('"', "Fullwidth Quotation Mark", '"'), + ('“', "Left Double Quotation Mark", '"'), + ('”', "Right Double Quotation Mark", '"'), + ('‟', "Double High-Reversed-9 Quotation Mark", '"'), + ('″', "Double Prime", '"'), + ('‶', "Reversed Double Prime", '"'), + ('〃', "Ditto Mark", '"'), + ('״', "Hebrew Punctuation Gershayim", '"'), + ('˝', "Double Acute Accent", '"'), + ('ʺ', "Modifier Letter Double Prime", '"'), + ('˶', "Modifier Letter Middle Double Acute Accent", '"'), + ('˵', "Modifier Letter Middle Double Grave Accent", '"'), + ('ˮ', "Modifier Letter Double Apostrophe", '"'), + ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'), + ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'), + ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'), + + ('(', "Fullwidth Left Parenthesis", '('), + ('❨', "Medium Left Parenthesis Ornament", '('), + ('﴾', "Ornate Left Parenthesis", '('), + + (')', "Fullwidth Right Parenthesis", ')'), + ('❩', "Medium Right Parenthesis Ornament", ')'), + ('﴿', "Ornate Right Parenthesis", ')'), + + ('[', "Fullwidth Left Square Bracket", '['), + ('❲', "Light Left Tortoise Shell Bracket Ornament", '['), + ('「', "Left Corner Bracket", '['), + ('『', "Left White Corner Bracket", '['), + ('【', "Left Black Lenticular Bracket", '['), + ('〔', "Left Tortoise Shell Bracket", '['), + ('〖', "Left White Lenticular Bracket", '['), + ('〘', "Left White Tortoise Shell Bracket", '['), + ('〚', "Left White Square Bracket", '['), + + (']', "Fullwidth Right Square Bracket", ']'), + ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'), + ('」', "Right Corner Bracket", ']'), + ('』', "Right White Corner Bracket", ']'), + ('】', "Right Black Lenticular Bracket", ']'), + ('〕', "Right Tortoise Shell Bracket", ']'), + ('〗', "Right White Lenticular Bracket", ']'), + ('〙', "Right White Tortoise Shell Bracket", ']'), + ('〛', "Right White Square Bracket", ']'), + + ('❴', "Medium Left Curly Bracket Ornament", '{'), + ('𝄔', "Musical Symbol Brace", '{'), + ('{', "Fullwidth Left Curly Bracket", '{'), + + ('❵', "Medium Right Curly Bracket Ornament", '}'), + ('}', "Fullwidth Right Curly Bracket", '}'), + + ('⁎', "Low Asterisk", '*'), + ('٭', "Arabic Five Pointed Star", '*'), + ('∗', "Asterisk Operator", '*'), + ('𐌟', "Old Italic Letter Ess", '*'), + ('*', "Fullwidth Asterisk", '*'), + + ('᜵', "Philippine Single Punctuation", '/'), + ('⁁', "Caret Insertion Point", '/'), + ('∕', "Division Slash", '/'), + ('⁄', "Fraction Slash", '/'), + ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'), + ('⟋', "Mathematical Rising Diagonal", '/'), + ('⧸', "Big Solidus", '/'), + ('𝈺', "Greek Instrumental Notation Symbol-47", '/'), + ('㇓', "CJK Stroke Sp", '/'), + ('〳', "Vertical Kana Repeat Mark Upper Half", '/'), + ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), + ('ノ', "Katakana Letter No", '/'), + ('丿', "CJK Unified Ideograph-4E3F", '/'), + ('⼃', "Kangxi Radical Slash", '/'), + ('/', "Fullwidth Solidus", '/'), + + ('\', "Fullwidth Reverse Solidus", '\\'), + ('﹨', "Small Reverse Solidus", '\\'), + ('∖', "Set Minus", '\\'), + ('⟍', "Mathematical Falling Diagonal", '\\'), + ('⧵', "Reverse Solidus Operator", '\\'), + ('⧹', "Big Reverse Solidus", '\\'), + ('⧹', "Greek Vocal Notation Symbol-16", '\\'), + ('⧹', "Greek Instrumental Symbol-48", '\\'), + ('㇔', "CJK Stroke D", '\\'), + ('丶', "CJK Unified Ideograph-4E36", '\\'), + ('⼂', "Kangxi Radical Dot", '\\'), + ('、', "Ideographic Comma", '\\'), + ('ヽ', "Katakana Iteration Mark", '\\'), + + ('ꝸ', "Latin Small Letter Um", '&'), + ('&', "Fullwidth Ampersand", '&'), + + ('᛭', "Runic Cross Punctuation", '+'), + ('➕', "Heavy Plus Sign", '+'), + ('𐊛', "Lycian Letter H", '+'), + ('﬩', "Hebrew Letter Alternative Plus Sign", '+'), + ('+', "Fullwidth Plus Sign", '+'), + + ('‹', "Single Left-Pointing Angle Quotation Mark", '<'), + ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'), + ('˂', "Modifier Letter Left Arrowhead", '<'), + ('𝈶', "Greek Instrumental Symbol-40", '<'), + ('ᐸ', "Canadian Syllabics Pa", '<'), + ('ᚲ', "Runic Letter Kauna", '<'), + ('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'), + ('⟨', "Mathematical Left Angle Bracket", '<'), + ('〈', "Left-Pointing Angle Bracket", '<'), + ('〈', "Left Angle Bracket", '<'), + ('㇛', "CJK Stroke Pd", '<'), + ('く', "Hiragana Letter Ku", '<'), + ('𡿨', "CJK Unified Ideograph-21FE8", '<'), + ('《', "Left Double Angle Bracket", '<'), + ('<', "Fullwidth Less-Than Sign", '<'), + + ('᐀', "Canadian Syllabics Hyphen", '='), + ('⹀', "Double Hyphen", '='), + ('゠', "Katakana-Hiragana Double Hyphen", '='), + ('꓿', "Lisu Punctuation Full Stop", '='), + ('=', "Fullwidth Equals Sign", '='), + + ('›', "Single Right-Pointing Angle Quotation Mark", '>'), + ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'), + ('˃', "Modifier Letter Right Arrowhead", '>'), + ('𝈷', "Greek Instrumental Symbol-42", '>'), + ('ᐳ', "Canadian Syllabics Po", '>'), + ('𖼿', "Miao Letter Archaic Zza", '>'), + ('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'), + ('⟩', "Mathematical Right Angle Bracket", '>'), + ('〉', "Right-Pointing Angle Bracket", '>'), + ('〉', "Right Angle Bracket", '>'), + ('》', "Right Double Angle Bracket", '>'), + ('>', "Fullwidth Greater-Than Sign", '>'), +]; + +// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of +// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`. +// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add +// fancier error recovery to it, as there will be less overall work to do this way. +const ASCII_ARRAY: &[(char, &str, Option)] = &[ + (' ', "Space", Some(token::Whitespace)), + ('_', "Underscore", Some(token::Ident(kw::Underscore, false))), + ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), + (',', "Comma", Some(token::Comma)), + (';', "Semicolon", Some(token::Semi)), + (':', "Colon", Some(token::Colon)), + ('!', "Exclamation Mark", Some(token::Not)), + ('?', "Question Mark", Some(token::Question)), + ('.', "Period", Some(token::Dot)), + ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))), + (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))), + ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))), + (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))), + ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))), + ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))), + ('*', "Asterisk", Some(token::BinOp(token::Star))), + ('/', "Slash", Some(token::BinOp(token::Slash))), + ('\\', "Backslash", None), + ('&', "Ampersand", Some(token::BinOp(token::And))), + ('+', "Plus Sign", Some(token::BinOp(token::Plus))), + ('<', "Less-Than Sign", Some(token::Lt)), + ('=', "Equals Sign", Some(token::Eq)), + ('>', "Greater-Than Sign", Some(token::Gt)), + // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by + // spitting the correct token out. + ('\'', "Single Quote", None), + ('"', "Quotation Mark", None), +]; + +crate fn check_for_substitution<'a>( + reader: &StringReader<'a>, + pos: BytePos, + ch: char, + err: &mut DiagnosticBuilder<'a>, +) -> Option { + let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) { + Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char), + None => return None, + }; + + let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8())); + + let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) { + Some((_ascii_char, ascii_name, token)) => (ascii_name, token), + None => { + let msg = format!("substitution character not found for '{}'", ch); + reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); + return None; + } + }; + + // special help suggestion for "directed" double quotes + if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') { + let msg = format!( + "Unicode characters '“' (Left Double Quotation Mark) and \ + '”' (Right Double Quotation Mark) look like '{}' ({}), but are not", + ascii_char, ascii_name + ); + err.span_suggestion( + Span::with_root_ctxt( + pos, + pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()), + ), + &msg, + format!("\"{}\"", s), + Applicability::MaybeIncorrect, + ); + } else { + let msg = format!( + "Unicode character '{}' ({}) looks like '{}' ({}), but it is not", + ch, u_name, ascii_char, ascii_name + ); + err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect); + } + token.clone() +} + +/// Extract string if found at current position with given delimiters +fn peek_delimited(text: &str, from_ch: char, to_ch: char) -> Option<&str> { + let mut chars = text.chars(); + let first_char = chars.next()?; + if first_char != from_ch { + return None; + } + let last_char_idx = chars.as_str().find(to_ch)?; + Some(&chars.as_str()[..last_char_idx]) +} diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs new file mode 100644 index 00000000000..9f507d5319e --- /dev/null +++ b/src/librustc_parse/lib.rs @@ -0,0 +1,423 @@ +//! The main parser interface. + +#![feature(crate_visibility_modifier)] + +use syntax::ast; +use syntax::print::pprust; +use syntax::sess::ParseSess; +use syntax::token::{self, Nonterminal}; +use syntax::tokenstream::{self, TokenStream, TokenTree}; + +use errors::{PResult, FatalError, Level, Diagnostic}; +use rustc_data_structures::sync::Lrc; +use syntax_pos::{Span, SourceFile, FileName}; + +use std::borrow::Cow; +use std::path::Path; +use std::str; + +use log::info; + +pub const MACRO_ARGUMENTS: Option<&'static str> = Some("macro arguments"); + +#[macro_use] +pub mod parser; +use parser::{Parser, emit_unclosed_delims, make_unclosed_delims_error}; +pub mod lexer; +pub mod validate_attr; +pub mod error_codes; + +#[derive(Clone)] +pub struct Directory<'a> { + pub path: Cow<'a, Path>, + pub ownership: DirectoryOwnership, +} + +#[derive(Copy, Clone)] +pub enum DirectoryOwnership { + Owned { + // None if `mod.rs`, `Some("foo")` if we're in `foo.rs`. + relative: Option, + }, + UnownedViaBlock, + UnownedViaMod, +} + +// A bunch of utility functions of the form `parse__from_` +// where includes crate, expr, item, stmt, tts, and one that +// uses a HOF to parse anything, and includes file and +// `source_str`. + +/// A variant of 'panictry!' that works on a Vec instead of a single DiagnosticBuilder. +macro_rules! panictry_buffer { + ($handler:expr, $e:expr) => ({ + use std::result::Result::{Ok, Err}; + use errors::FatalError; + match $e { + Ok(e) => e, + Err(errs) => { + for e in errs { + $handler.emit_diagnostic(&e); + } + FatalError.raise() + } + } + }) +} + +pub fn parse_crate_from_file<'a>(input: &Path, sess: &'a ParseSess) -> PResult<'a, ast::Crate> { + let mut parser = new_parser_from_file(sess, input); + parser.parse_crate_mod() +} + +pub fn parse_crate_attrs_from_file<'a>(input: &Path, sess: &'a ParseSess) + -> PResult<'a, Vec> { + let mut parser = new_parser_from_file(sess, input); + parser.parse_inner_attributes() +} + +pub fn parse_crate_from_source_str(name: FileName, source: String, sess: &ParseSess) + -> PResult<'_, ast::Crate> { + new_parser_from_source_str(sess, name, source).parse_crate_mod() +} + +pub fn parse_crate_attrs_from_source_str(name: FileName, source: String, sess: &ParseSess) + -> PResult<'_, Vec> { + new_parser_from_source_str(sess, name, source).parse_inner_attributes() +} + +pub fn parse_stream_from_source_str( + name: FileName, + source: String, + sess: &ParseSess, + override_span: Option, +) -> TokenStream { + let (stream, mut errors) = source_file_to_stream( + sess, + sess.source_map().new_source_file(name, source), + override_span, + ); + emit_unclosed_delims(&mut errors, &sess); + stream +} + +/// Creates a new parser from a source string. +pub fn new_parser_from_source_str(sess: &ParseSess, name: FileName, source: String) -> Parser<'_> { + panictry_buffer!(&sess.span_diagnostic, maybe_new_parser_from_source_str(sess, name, source)) +} + +/// Creates a new parser from a source string. Returns any buffered errors from lexing the initial +/// token stream. +pub fn maybe_new_parser_from_source_str(sess: &ParseSess, name: FileName, source: String) + -> Result, Vec> +{ + let mut parser = maybe_source_file_to_parser(sess, + sess.source_map().new_source_file(name, source))?; + parser.recurse_into_file_modules = false; + Ok(parser) +} + +/// Creates a new parser, handling errors as appropriate if the file doesn't exist. +pub fn new_parser_from_file<'a>(sess: &'a ParseSess, path: &Path) -> Parser<'a> { + source_file_to_parser(sess, file_to_source_file(sess, path, None)) +} + +/// Creates a new parser, returning buffered diagnostics if the file doesn't exist, +/// or from lexing the initial token stream. +pub fn maybe_new_parser_from_file<'a>(sess: &'a ParseSess, path: &Path) + -> Result, Vec> { + let file = try_file_to_source_file(sess, path, None).map_err(|db| vec![db])?; + maybe_source_file_to_parser(sess, file) +} + +/// Given a session, a crate config, a path, and a span, add +/// the file at the given path to the `source_map`, and returns a parser. +/// On an error, uses the given span as the source of the problem. +pub fn new_sub_parser_from_file<'a>(sess: &'a ParseSess, + path: &Path, + directory_ownership: DirectoryOwnership, + module_name: Option, + sp: Span) -> Parser<'a> { + let mut p = source_file_to_parser(sess, file_to_source_file(sess, path, Some(sp))); + p.directory.ownership = directory_ownership; + p.root_module_name = module_name; + p +} + +/// Given a `source_file` and config, returns a parser. +fn source_file_to_parser(sess: &ParseSess, source_file: Lrc) -> Parser<'_> { + panictry_buffer!(&sess.span_diagnostic, + maybe_source_file_to_parser(sess, source_file)) +} + +/// Given a `source_file` and config, return a parser. Returns any buffered errors from lexing the +/// initial token stream. +fn maybe_source_file_to_parser( + sess: &ParseSess, + source_file: Lrc, +) -> Result, Vec> { + let end_pos = source_file.end_pos; + let (stream, unclosed_delims) = maybe_file_to_stream(sess, source_file, None)?; + let mut parser = stream_to_parser(sess, stream, None); + parser.unclosed_delims = unclosed_delims; + if parser.token == token::Eof && parser.token.span.is_dummy() { + parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt()); + } + + Ok(parser) +} + +// Must preserve old name for now, because `quote!` from the *existing* +// compiler expands into it. +pub fn new_parser_from_tts(sess: &ParseSess, tts: Vec) -> Parser<'_> { + stream_to_parser(sess, tts.into_iter().collect(), crate::MACRO_ARGUMENTS) +} + + +// Base abstractions + +/// Given a session and a path and an optional span (for error reporting), +/// add the path to the session's source_map and return the new source_file or +/// error when a file can't be read. +fn try_file_to_source_file(sess: &ParseSess, path: &Path, spanopt: Option) + -> Result, Diagnostic> { + sess.source_map().load_file(path) + .map_err(|e| { + let msg = format!("couldn't read {}: {}", path.display(), e); + let mut diag = Diagnostic::new(Level::Fatal, &msg); + if let Some(sp) = spanopt { + diag.set_span(sp); + } + diag + }) +} + +/// Given a session and a path and an optional span (for error reporting), +/// adds the path to the session's `source_map` and returns the new `source_file`. +fn file_to_source_file(sess: &ParseSess, path: &Path, spanopt: Option) + -> Lrc { + match try_file_to_source_file(sess, path, spanopt) { + Ok(source_file) => source_file, + Err(d) => { + sess.span_diagnostic.emit_diagnostic(&d); + FatalError.raise(); + } + } +} + +/// Given a `source_file`, produces a sequence of token trees. +pub fn source_file_to_stream( + sess: &ParseSess, + source_file: Lrc, + override_span: Option, +) -> (TokenStream, Vec) { + panictry_buffer!(&sess.span_diagnostic, maybe_file_to_stream(sess, source_file, override_span)) +} + +/// Given a source file, produces a sequence of token trees. Returns any buffered errors from +/// parsing the token stream. +pub fn maybe_file_to_stream( + sess: &ParseSess, + source_file: Lrc, + override_span: Option, +) -> Result<(TokenStream, Vec), Vec> { + let srdr = lexer::StringReader::new(sess, source_file, override_span); + let (token_trees, unmatched_braces) = srdr.into_token_trees(); + + match token_trees { + Ok(stream) => Ok((stream, unmatched_braces)), + Err(err) => { + let mut buffer = Vec::with_capacity(1); + err.buffer(&mut buffer); + // Not using `emit_unclosed_delims` to use `db.buffer` + for unmatched in unmatched_braces { + if let Some(err) = make_unclosed_delims_error(unmatched, &sess) { + err.buffer(&mut buffer); + } + } + Err(buffer) + } + } +} + +/// Given a stream and the `ParseSess`, produces a parser. +pub fn stream_to_parser<'a>( + sess: &'a ParseSess, + stream: TokenStream, + subparser_name: Option<&'static str>, +) -> Parser<'a> { + Parser::new(sess, stream, None, true, false, subparser_name) +} + +/// Given a stream, the `ParseSess` and the base directory, produces a parser. +/// +/// Use this function when you are creating a parser from the token stream +/// and also care about the current working directory of the parser (e.g., +/// you are trying to resolve modules defined inside a macro invocation). +/// +/// # Note +/// +/// The main usage of this function is outside of rustc, for those who uses +/// libsyntax as a library. Please do not remove this function while refactoring +/// just because it is not used in rustc codebase! +pub fn stream_to_parser_with_base_dir<'a>( + sess: &'a ParseSess, + stream: TokenStream, + base_dir: Directory<'a>, +) -> Parser<'a> { + Parser::new(sess, stream, Some(base_dir), true, false, None) +} + +/// Runs the given subparser `f` on the tokens of the given `attr`'s item. +pub fn parse_in_attr<'a, T>( + sess: &'a ParseSess, + attr: &ast::Attribute, + mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, +) -> PResult<'a, T> { + let mut parser = Parser::new( + sess, + attr.get_normal_item().tokens.clone(), + None, + false, + false, + Some("attribute"), + ); + let result = f(&mut parser)?; + if parser.token != token::Eof { + parser.unexpected()?; + } + Ok(result) +} + +// NOTE(Centril): The following probably shouldn't be here but it acknowledges the +// fact that architecturally, we are using parsing (read on below to understand why). + +pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> TokenStream { + // A `Nonterminal` is often a parsed AST item. At this point we now + // need to convert the parsed AST to an actual token stream, e.g. + // un-parse it basically. + // + // Unfortunately there's not really a great way to do that in a + // guaranteed lossless fashion right now. The fallback here is to just + // stringify the AST node and reparse it, but this loses all span + // information. + // + // As a result, some AST nodes are annotated with the token stream they + // came from. Here we attempt to extract these lossless token streams + // before we fall back to the stringification. + let tokens = match *nt { + Nonterminal::NtItem(ref item) => { + prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span) + } + Nonterminal::NtTraitItem(ref item) => { + prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span) + } + Nonterminal::NtImplItem(ref item) => { + prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span) + } + Nonterminal::NtIdent(ident, is_raw) => { + Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into()) + } + Nonterminal::NtLifetime(ident) => { + Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into()) + } + Nonterminal::NtTT(ref tt) => { + Some(tt.clone().into()) + } + _ => None, + }; + + // FIXME(#43081): Avoid this pretty-print + reparse hack + let source = pprust::nonterminal_to_string(nt); + let filename = FileName::macro_expansion_source_code(&source); + let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span)); + + // During early phases of the compiler the AST could get modified + // directly (e.g., attributes added or removed) and the internal cache + // of tokens my not be invalidated or updated. Consequently if the + // "lossless" token stream disagrees with our actual stringification + // (which has historically been much more battle-tested) then we go + // with the lossy stream anyway (losing span information). + // + // Note that the comparison isn't `==` here to avoid comparing spans, + // but it *also* is a "probable" equality which is a pretty weird + // definition. We mostly want to catch actual changes to the AST + // like a `#[cfg]` being processed or some weird `macro_rules!` + // expansion. + // + // What we *don't* want to catch is the fact that a user-defined + // literal like `0xf` is stringified as `15`, causing the cached token + // stream to not be literal `==` token-wise (ignoring spans) to the + // token stream we got from stringification. + // + // Instead the "probably equal" check here is "does each token + // recursively have the same discriminant?" We basically don't look at + // the token values here and assume that such fine grained token stream + // modifications, including adding/removing typically non-semantic + // tokens such as extra braces and commas, don't happen. + if let Some(tokens) = tokens { + if tokens.probably_equal_for_proc_macro(&tokens_for_real) { + return tokens + } + info!("cached tokens found, but they're not \"probably equal\", \ + going with stringified version"); + } + return tokens_for_real +} + +fn prepend_attrs( + sess: &ParseSess, + attrs: &[ast::Attribute], + tokens: Option<&tokenstream::TokenStream>, + span: syntax_pos::Span +) -> Option { + let tokens = tokens?; + if attrs.len() == 0 { + return Some(tokens.clone()) + } + let mut builder = tokenstream::TokenStreamBuilder::new(); + for attr in attrs { + assert_eq!(attr.style, ast::AttrStyle::Outer, + "inner attributes should prevent cached tokens from existing"); + + let source = pprust::attribute_to_string(attr); + let macro_filename = FileName::macro_expansion_source_code(&source); + + let item = match attr.kind { + ast::AttrKind::Normal(ref item) => item, + ast::AttrKind::DocComment(_) => { + let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span)); + builder.push(stream); + continue + } + }; + + // synthesize # [ $path $tokens ] manually here + let mut brackets = tokenstream::TokenStreamBuilder::new(); + + // For simple paths, push the identifier directly + if item.path.segments.len() == 1 && item.path.segments[0].args.is_none() { + let ident = item.path.segments[0].ident; + let token = token::Ident(ident.name, ident.as_str().starts_with("r#")); + brackets.push(tokenstream::TokenTree::token(token, ident.span)); + + // ... and for more complicated paths, fall back to a reparse hack that + // should eventually be removed. + } else { + let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span)); + brackets.push(stream); + } + + brackets.push(item.tokens.clone()); + + // The span we list here for `#` and for `[ ... ]` are both wrong in + // that it encompasses more than each token, but it hopefully is "good + // enough" for now at least. + builder.push(tokenstream::TokenTree::token(token::Pound, attr.span)); + let delim_span = tokenstream::DelimSpan::from_single(attr.span); + builder.push(tokenstream::TokenTree::Delimited( + delim_span, token::DelimToken::Bracket, brackets.build().into())); + } + builder.push(tokens.clone()); + Some(builder.build()) +} diff --git a/src/librustc_parse/parser/attr.rs b/src/librustc_parse/parser/attr.rs new file mode 100644 index 00000000000..524b551e54c --- /dev/null +++ b/src/librustc_parse/parser/attr.rs @@ -0,0 +1,351 @@ +use super::{SeqSep, Parser, TokenType, PathStyle}; +use syntax::attr; +use syntax::ast; +use syntax::util::comments; +use syntax::token::{self, Nonterminal, DelimToken}; +use syntax::tokenstream::{TokenStream, TokenTree}; +use syntax_pos::{Span, Symbol}; +use errors::PResult; + +use log::debug; + +#[derive(Debug)] +enum InnerAttributeParsePolicy<'a> { + Permitted, + NotPermitted { reason: &'a str, saw_doc_comment: bool, prev_attr_sp: Option }, +} + +const DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG: &str = "an inner attribute is not \ + permitted in this context"; + +impl<'a> Parser<'a> { + /// Parses attributes that appear before an item. + pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, Vec> { + let mut attrs: Vec = Vec::new(); + let mut just_parsed_doc_comment = false; + loop { + debug!("parse_outer_attributes: self.token={:?}", self.token); + match self.token.kind { + token::Pound => { + let inner_error_reason = if just_parsed_doc_comment { + "an inner attribute is not permitted following an outer doc comment" + } else if !attrs.is_empty() { + "an inner attribute is not permitted following an outer attribute" + } else { + DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG + }; + let inner_parse_policy = + InnerAttributeParsePolicy::NotPermitted { + reason: inner_error_reason, + saw_doc_comment: just_parsed_doc_comment, + prev_attr_sp: attrs.last().and_then(|a| Some(a.span)) + }; + let attr = self.parse_attribute_with_inner_parse_policy(inner_parse_policy)?; + attrs.push(attr); + just_parsed_doc_comment = false; + } + token::DocComment(s) => { + let attr = self.mk_doc_comment(s); + if attr.style != ast::AttrStyle::Outer { + let mut err = self.fatal("expected outer doc comment"); + err.note("inner doc comments like this (starting with \ + `//!` or `/*!`) can only appear before items"); + return Err(err); + } + attrs.push(attr); + self.bump(); + just_parsed_doc_comment = true; + } + _ => break, + } + } + Ok(attrs) + } + + fn mk_doc_comment(&self, s: Symbol) -> ast::Attribute { + let style = comments::doc_comment_style(&s.as_str()); + attr::mk_doc_comment(style, s, self.token.span) + } + + /// Matches `attribute = # ! [ meta_item ]`. + /// + /// If `permit_inner` is `true`, then a leading `!` indicates an inner + /// attribute. + pub fn parse_attribute(&mut self, permit_inner: bool) -> PResult<'a, ast::Attribute> { + debug!("parse_attribute: permit_inner={:?} self.token={:?}", + permit_inner, + self.token); + let inner_parse_policy = if permit_inner { + InnerAttributeParsePolicy::Permitted + } else { + InnerAttributeParsePolicy::NotPermitted { + reason: DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG, + saw_doc_comment: false, + prev_attr_sp: None + } + }; + self.parse_attribute_with_inner_parse_policy(inner_parse_policy) + } + + /// The same as `parse_attribute`, except it takes in an `InnerAttributeParsePolicy` + /// that prescribes how to handle inner attributes. + fn parse_attribute_with_inner_parse_policy( + &mut self, + inner_parse_policy: InnerAttributeParsePolicy<'_> + ) -> PResult<'a, ast::Attribute> { + debug!("parse_attribute_with_inner_parse_policy: inner_parse_policy={:?} self.token={:?}", + inner_parse_policy, + self.token); + let (span, item, style) = match self.token.kind { + token::Pound => { + let lo = self.token.span; + self.bump(); + + if let InnerAttributeParsePolicy::Permitted = inner_parse_policy { + self.expected_tokens.push(TokenType::Token(token::Not)); + } + + let style = if self.token == token::Not { + self.bump(); + ast::AttrStyle::Inner + } else { + ast::AttrStyle::Outer + }; + + self.expect(&token::OpenDelim(token::Bracket))?; + let item = self.parse_attr_item()?; + self.expect(&token::CloseDelim(token::Bracket))?; + let hi = self.prev_span; + + let attr_sp = lo.to(hi); + + // Emit error if inner attribute is encountered and not permitted + if style == ast::AttrStyle::Inner { + if let InnerAttributeParsePolicy::NotPermitted { reason, + saw_doc_comment, prev_attr_sp } = inner_parse_policy { + let prev_attr_note = if saw_doc_comment { + "previous doc comment" + } else { + "previous outer attribute" + }; + + let mut diagnostic = self + .diagnostic() + .struct_span_err(attr_sp, reason); + + if let Some(prev_attr_sp) = prev_attr_sp { + diagnostic + .span_label(attr_sp, "not permitted following an outer attibute") + .span_label(prev_attr_sp, prev_attr_note); + } + + diagnostic + .note("inner attributes, like `#![no_std]`, annotate the item \ + enclosing them, and are usually found at the beginning of \ + source files. Outer attributes, like `#[test]`, annotate the \ + item following them.") + .emit() + } + } + + (attr_sp, item, style) + } + _ => { + let token_str = self.this_token_to_string(); + return Err(self.fatal(&format!("expected `#`, found `{}`", token_str))); + } + }; + + Ok(attr::mk_attr_from_item(style, item, span)) + } + + /// Parses an inner part of an attribute (the path and following tokens). + /// The tokens must be either a delimited token stream, or empty token stream, + /// or the "legacy" key-value form. + /// PATH `(` TOKEN_STREAM `)` + /// PATH `[` TOKEN_STREAM `]` + /// PATH `{` TOKEN_STREAM `}` + /// PATH + /// PATH `=` UNSUFFIXED_LIT + /// The delimiters or `=` are still put into the resulting token stream. + pub fn parse_attr_item(&mut self) -> PResult<'a, ast::AttrItem> { + let item = match self.token.kind { + token::Interpolated(ref nt) => match **nt { + Nonterminal::NtMeta(ref item) => Some(item.clone()), + _ => None, + }, + _ => None, + }; + Ok(if let Some(item) = item { + self.bump(); + item + } else { + let path = self.parse_path(PathStyle::Mod)?; + let tokens = if self.check(&token::OpenDelim(DelimToken::Paren)) || + self.check(&token::OpenDelim(DelimToken::Bracket)) || + self.check(&token::OpenDelim(DelimToken::Brace)) { + self.parse_token_tree().into() + } else if self.eat(&token::Eq) { + let eq = TokenTree::token(token::Eq, self.prev_span); + let mut is_interpolated_expr = false; + if let token::Interpolated(nt) = &self.token.kind { + if let token::NtExpr(..) = **nt { + is_interpolated_expr = true; + } + } + let token_tree = if is_interpolated_expr { + // We need to accept arbitrary interpolated expressions to continue + // supporting things like `doc = $expr` that work on stable. + // Non-literal interpolated expressions are rejected after expansion. + self.parse_token_tree() + } else { + self.parse_unsuffixed_lit()?.token_tree() + }; + TokenStream::new(vec![eq.into(), token_tree.into()]) + } else { + TokenStream::default() + }; + ast::AttrItem { path, tokens } + }) + } + + /// Parses attributes that appear after the opening of an item. These should + /// be preceded by an exclamation mark, but we accept and warn about one + /// terminated by a semicolon. + /// + /// Matches `inner_attrs*`. + crate fn parse_inner_attributes(&mut self) -> PResult<'a, Vec> { + let mut attrs: Vec = vec![]; + loop { + match self.token.kind { + token::Pound => { + // Don't even try to parse if it's not an inner attribute. + if !self.look_ahead(1, |t| t == &token::Not) { + break; + } + + let attr = self.parse_attribute(true)?; + assert_eq!(attr.style, ast::AttrStyle::Inner); + attrs.push(attr); + } + token::DocComment(s) => { + // We need to get the position of this token before we bump. + let attr = self.mk_doc_comment(s); + if attr.style == ast::AttrStyle::Inner { + attrs.push(attr); + self.bump(); + } else { + break; + } + } + _ => break, + } + } + Ok(attrs) + } + + fn parse_unsuffixed_lit(&mut self) -> PResult<'a, ast::Lit> { + let lit = self.parse_lit()?; + debug!("checking if {:?} is unusuffixed", lit); + + if !lit.kind.is_unsuffixed() { + let msg = "suffixed literals are not allowed in attributes"; + self.diagnostic().struct_span_err(lit.span, msg) + .help("instead of using a suffixed literal \ + (1u8, 1.0f32, etc.), use an unsuffixed version \ + (1, 1.0, etc.).") + .emit() + } + + Ok(lit) + } + + /// Parses `cfg_attr(pred, attr_item_list)` where `attr_item_list` is comma-delimited. + pub fn parse_cfg_attr(&mut self) -> PResult<'a, (ast::MetaItem, Vec<(ast::AttrItem, Span)>)> { + self.expect(&token::OpenDelim(token::Paren))?; + + let cfg_predicate = self.parse_meta_item()?; + self.expect(&token::Comma)?; + + // Presumably, the majority of the time there will only be one attr. + let mut expanded_attrs = Vec::with_capacity(1); + + while !self.check(&token::CloseDelim(token::Paren)) { + let lo = self.token.span.lo(); + let item = self.parse_attr_item()?; + expanded_attrs.push((item, self.prev_span.with_lo(lo))); + self.expect_one_of(&[token::Comma], &[token::CloseDelim(token::Paren)])?; + } + + self.expect(&token::CloseDelim(token::Paren))?; + Ok((cfg_predicate, expanded_attrs)) + } + + /// Matches the following grammar (per RFC 1559). + /// + /// meta_item : PATH ( '=' UNSUFFIXED_LIT | '(' meta_item_inner? ')' )? ; + /// meta_item_inner : (meta_item | UNSUFFIXED_LIT) (',' meta_item_inner)? ; + pub fn parse_meta_item(&mut self) -> PResult<'a, ast::MetaItem> { + let nt_meta = match self.token.kind { + token::Interpolated(ref nt) => match **nt { + token::NtMeta(ref e) => Some(e.clone()), + _ => None, + }, + _ => None, + }; + + if let Some(item) = nt_meta { + return match item.meta(item.path.span) { + Some(meta) => { + self.bump(); + Ok(meta) + } + None => self.unexpected(), + } + } + + let lo = self.token.span; + let path = self.parse_path(PathStyle::Mod)?; + let kind = self.parse_meta_item_kind()?; + let span = lo.to(self.prev_span); + Ok(ast::MetaItem { path, kind, span }) + } + + crate fn parse_meta_item_kind(&mut self) -> PResult<'a, ast::MetaItemKind> { + Ok(if self.eat(&token::Eq) { + ast::MetaItemKind::NameValue(self.parse_unsuffixed_lit()?) + } else if self.eat(&token::OpenDelim(token::Paren)) { + ast::MetaItemKind::List(self.parse_meta_seq()?) + } else { + ast::MetaItemKind::Word + }) + } + + /// Matches `meta_item_inner : (meta_item | UNSUFFIXED_LIT) ;`. + fn parse_meta_item_inner(&mut self) -> PResult<'a, ast::NestedMetaItem> { + match self.parse_unsuffixed_lit() { + Ok(lit) => { + return Ok(ast::NestedMetaItem::Literal(lit)) + } + Err(ref mut err) => err.cancel(), + } + + match self.parse_meta_item() { + Ok(mi) => { + return Ok(ast::NestedMetaItem::MetaItem(mi)) + } + Err(ref mut err) => err.cancel(), + } + + let found = self.this_token_to_string(); + let msg = format!("expected unsuffixed literal or identifier, found `{}`", found); + Err(self.diagnostic().struct_span_err(self.token.span, &msg)) + } + + /// Matches `meta_seq = ( COMMASEP(meta_item_inner) )`. + fn parse_meta_seq(&mut self) -> PResult<'a, Vec> { + self.parse_seq_to_end(&token::CloseDelim(token::Paren), + SeqSep::trailing_allowed(token::Comma), + |p: &mut Parser<'a>| p.parse_meta_item_inner()) + } +} diff --git a/src/librustc_parse/parser/diagnostics.rs b/src/librustc_parse/parser/diagnostics.rs new file mode 100644 index 00000000000..38eae008537 --- /dev/null +++ b/src/librustc_parse/parser/diagnostics.rs @@ -0,0 +1,1549 @@ +use super::{BlockMode, PathStyle, SemiColonMode, TokenType, TokenExpectType, SeqSep, Parser}; + +use syntax::ast::{ + self, Param, BinOpKind, BindingMode, BlockCheckMode, Expr, ExprKind, Ident, Item, ItemKind, + Mutability, Pat, PatKind, PathSegment, QSelf, Ty, TyKind, +}; +use syntax::token::{self, TokenKind, token_can_begin_expr}; +use syntax::print::pprust; +use syntax::ptr::P; +use syntax::symbol::{kw, sym}; +use syntax::ThinVec; +use syntax::util::parser::AssocOp; +use syntax::struct_span_err; + +use errors::{PResult, Applicability, DiagnosticBuilder, DiagnosticId, pluralize}; +use rustc_data_structures::fx::FxHashSet; +use syntax_pos::{Span, DUMMY_SP, MultiSpan, SpanSnippetError}; +use log::{debug, trace}; +use std::mem; + +const TURBOFISH: &'static str = "use `::<...>` instead of `<...>` to specify type arguments"; + +/// Creates a placeholder argument. +pub(super) fn dummy_arg(ident: Ident) -> Param { + let pat = P(Pat { + id: ast::DUMMY_NODE_ID, + kind: PatKind::Ident(BindingMode::ByValue(Mutability::Immutable), ident, None), + span: ident.span, + }); + let ty = Ty { + kind: TyKind::Err, + span: ident.span, + id: ast::DUMMY_NODE_ID + }; + Param { + attrs: ThinVec::default(), + id: ast::DUMMY_NODE_ID, + pat, + span: ident.span, + ty: P(ty), + is_placeholder: false, + } +} + +pub enum Error { + FileNotFoundForModule { + mod_name: String, + default_path: String, + secondary_path: String, + dir_path: String, + }, + DuplicatePaths { + mod_name: String, + default_path: String, + secondary_path: String, + }, + UselessDocComment, + InclusiveRangeWithNoEnd, +} + +impl Error { + fn span_err>( + self, + sp: S, + handler: &errors::Handler, + ) -> DiagnosticBuilder<'_> { + match self { + Error::FileNotFoundForModule { + ref mod_name, + ref default_path, + ref secondary_path, + ref dir_path, + } => { + let mut err = struct_span_err!( + handler, + sp, + E0583, + "file not found for module `{}`", + mod_name, + ); + err.help(&format!( + "name the file either {} or {} inside the directory \"{}\"", + default_path, + secondary_path, + dir_path, + )); + err + } + Error::DuplicatePaths { ref mod_name, ref default_path, ref secondary_path } => { + let mut err = struct_span_err!( + handler, + sp, + E0584, + "file for module `{}` found at both {} and {}", + mod_name, + default_path, + secondary_path, + ); + err.help("delete or rename one of them to remove the ambiguity"); + err + } + Error::UselessDocComment => { + let mut err = struct_span_err!( + handler, + sp, + E0585, + "found a documentation comment that doesn't document anything", + ); + err.help("doc comments must come before what they document, maybe a comment was \ + intended with `//`?"); + err + } + Error::InclusiveRangeWithNoEnd => { + let mut err = struct_span_err!( + handler, + sp, + E0586, + "inclusive range with no end", + ); + err.help("inclusive ranges must be bounded at the end (`..=b` or `a..=b`)"); + err + } + } + } +} + +pub(super) trait RecoverQPath: Sized + 'static { + const PATH_STYLE: PathStyle = PathStyle::Expr; + fn to_ty(&self) -> Option>; + fn recovered(qself: Option, path: ast::Path) -> Self; +} + +impl RecoverQPath for Ty { + const PATH_STYLE: PathStyle = PathStyle::Type; + fn to_ty(&self) -> Option> { + Some(P(self.clone())) + } + fn recovered(qself: Option, path: ast::Path) -> Self { + Self { + span: path.span, + kind: TyKind::Path(qself, path), + id: ast::DUMMY_NODE_ID, + } + } +} + +impl RecoverQPath for Pat { + fn to_ty(&self) -> Option> { + self.to_ty() + } + fn recovered(qself: Option, path: ast::Path) -> Self { + Self { + span: path.span, + kind: PatKind::Path(qself, path), + id: ast::DUMMY_NODE_ID, + } + } +} + +impl RecoverQPath for Expr { + fn to_ty(&self) -> Option> { + self.to_ty() + } + fn recovered(qself: Option, path: ast::Path) -> Self { + Self { + span: path.span, + kind: ExprKind::Path(qself, path), + attrs: ThinVec::new(), + id: ast::DUMMY_NODE_ID, + } + } +} + +/// Control whether the closing delimiter should be consumed when calling `Parser::consume_block`. +crate enum ConsumeClosingDelim { + Yes, + No, +} + +impl<'a> Parser<'a> { + pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> { + self.span_fatal(self.token.span, m) + } + + crate fn span_fatal>(&self, sp: S, m: &str) -> DiagnosticBuilder<'a> { + self.sess.span_diagnostic.struct_span_fatal(sp, m) + } + + pub(super) fn span_fatal_err>( + &self, + sp: S, + err: Error, + ) -> DiagnosticBuilder<'a> { + err.span_err(sp, self.diagnostic()) + } + + pub(super) fn bug(&self, m: &str) -> ! { + self.sess.span_diagnostic.span_bug(self.token.span, m) + } + + pub(super) fn span_err>(&self, sp: S, m: &str) { + self.sess.span_diagnostic.span_err(sp, m) + } + + pub fn struct_span_err>(&self, sp: S, m: &str) -> DiagnosticBuilder<'a> { + self.sess.span_diagnostic.struct_span_err(sp, m) + } + + pub fn span_bug>(&self, sp: S, m: &str) -> ! { + self.sess.span_diagnostic.span_bug(sp, m) + } + + pub(super) fn diagnostic(&self) -> &'a errors::Handler { + &self.sess.span_diagnostic + } + + pub(super) fn span_to_snippet(&self, span: Span) -> Result { + self.sess.source_map().span_to_snippet(span) + } + + pub(super) fn expected_ident_found(&self) -> DiagnosticBuilder<'a> { + let mut err = self.struct_span_err( + self.token.span, + &format!("expected identifier, found {}", self.this_token_descr()), + ); + if let token::Ident(name, false) = self.token.kind { + if Ident::new(name, self.token.span).is_raw_guess() { + err.span_suggestion( + self.token.span, + "you can escape reserved keywords to use them as identifiers", + format!("r#{}", name), + Applicability::MaybeIncorrect, + ); + } + } + if let Some(token_descr) = self.token_descr() { + err.span_label(self.token.span, format!("expected identifier, found {}", token_descr)); + } else { + err.span_label(self.token.span, "expected identifier"); + if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) { + err.span_suggestion( + self.token.span, + "remove this comma", + String::new(), + Applicability::MachineApplicable, + ); + } + } + err + } + + pub(super) fn expected_one_of_not_found( + &mut self, + edible: &[TokenKind], + inedible: &[TokenKind], + ) -> PResult<'a, bool /* recovered */> { + fn tokens_to_string(tokens: &[TokenType]) -> String { + let mut i = tokens.iter(); + // This might be a sign we need a connect method on `Iterator`. + let b = i.next() + .map_or(String::new(), |t| t.to_string()); + i.enumerate().fold(b, |mut b, (i, a)| { + if tokens.len() > 2 && i == tokens.len() - 2 { + b.push_str(", or "); + } else if tokens.len() == 2 && i == tokens.len() - 2 { + b.push_str(" or "); + } else { + b.push_str(", "); + } + b.push_str(&a.to_string()); + b + }) + } + + let mut expected = edible.iter() + .map(|x| TokenType::Token(x.clone())) + .chain(inedible.iter().map(|x| TokenType::Token(x.clone()))) + .chain(self.expected_tokens.iter().cloned()) + .collect::>(); + expected.sort_by_cached_key(|x| x.to_string()); + expected.dedup(); + let expect = tokens_to_string(&expected[..]); + let actual = self.this_token_descr(); + let (msg_exp, (label_sp, label_exp)) = if expected.len() > 1 { + let short_expect = if expected.len() > 6 { + format!("{} possible tokens", expected.len()) + } else { + expect.clone() + }; + (format!("expected one of {}, found {}", expect, actual), + (self.sess.source_map().next_point(self.prev_span), + format!("expected one of {}", short_expect))) + } else if expected.is_empty() { + (format!("unexpected token: {}", actual), + (self.prev_span, "unexpected token after this".to_string())) + } else { + (format!("expected {}, found {}", expect, actual), + (self.sess.source_map().next_point(self.prev_span), + format!("expected {}", expect))) + }; + self.last_unexpected_token_span = Some(self.token.span); + let mut err = self.fatal(&msg_exp); + if self.token.is_ident_named(sym::and) { + err.span_suggestion_short( + self.token.span, + "use `&&` instead of `and` for the boolean operator", + "&&".to_string(), + Applicability::MaybeIncorrect, + ); + } + if self.token.is_ident_named(sym::or) { + err.span_suggestion_short( + self.token.span, + "use `||` instead of `or` for the boolean operator", + "||".to_string(), + Applicability::MaybeIncorrect, + ); + } + let sp = if self.token == token::Eof { + // This is EOF; don't want to point at the following char, but rather the last token. + self.prev_span + } else { + label_sp + }; + match self.recover_closing_delimiter(&expected.iter().filter_map(|tt| match tt { + TokenType::Token(t) => Some(t.clone()), + _ => None, + }).collect::>(), err) { + Err(e) => err = e, + Ok(recovered) => { + return Ok(recovered); + } + } + + let sm = self.sess.source_map(); + if self.prev_span == DUMMY_SP { + // Account for macro context where the previous span might not be + // available to avoid incorrect output (#54841). + err.span_label(self.token.span, label_exp); + } else if !sm.is_multiline(self.token.span.shrink_to_hi().until(sp.shrink_to_lo())) { + // When the spans are in the same line, it means that the only content between + // them is whitespace, point at the found token in that case: + // + // X | () => { syntax error }; + // | ^^^^^ expected one of 8 possible tokens here + // + // instead of having: + // + // X | () => { syntax error }; + // | -^^^^^ unexpected token + // | | + // | expected one of 8 possible tokens here + err.span_label(self.token.span, label_exp); + } else { + err.span_label(sp, label_exp); + err.span_label(self.token.span, "unexpected token"); + } + self.maybe_annotate_with_ascription(&mut err, false); + Err(err) + } + + pub fn maybe_annotate_with_ascription( + &mut self, + err: &mut DiagnosticBuilder<'_>, + maybe_expected_semicolon: bool, + ) { + if let Some((sp, likely_path)) = self.last_type_ascription.take() { + let sm = self.sess.source_map(); + let next_pos = sm.lookup_char_pos(self.token.span.lo()); + let op_pos = sm.lookup_char_pos(sp.hi()); + + let allow_unstable = self.sess.unstable_features.is_nightly_build(); + + if likely_path { + err.span_suggestion( + sp, + "maybe write a path separator here", + "::".to_string(), + if allow_unstable { + Applicability::MaybeIncorrect + } else { + Applicability::MachineApplicable + }, + ); + } else if op_pos.line != next_pos.line && maybe_expected_semicolon { + err.span_suggestion( + sp, + "try using a semicolon", + ";".to_string(), + Applicability::MaybeIncorrect, + ); + } else if allow_unstable { + err.span_label(sp, "tried to parse a type due to this type ascription"); + } else { + err.span_label(sp, "tried to parse a type due to this"); + } + if allow_unstable { + // Give extra information about type ascription only if it's a nightly compiler. + err.note("`#![feature(type_ascription)]` lets you annotate an expression with a \ + type: `: `"); + err.note("for more information, see \ + https://github.com/rust-lang/rust/issues/23416"); + } + } + } + + /// Eats and discards tokens until one of `kets` is encountered. Respects token trees, + /// passes through any errors encountered. Used for error recovery. + pub(super) fn eat_to_tokens(&mut self, kets: &[&TokenKind]) { + if let Err(ref mut err) = self.parse_seq_to_before_tokens( + kets, + SeqSep::none(), + TokenExpectType::Expect, + |p| Ok(p.parse_token_tree()), + ) { + err.cancel(); + } + } + + /// This function checks if there are trailing angle brackets and produces + /// a diagnostic to suggest removing them. + /// + /// ```ignore (diagnostic) + /// let _ = vec![1, 2, 3].into_iter().collect::>>>(); + /// ^^ help: remove extra angle brackets + /// ``` + pub(super) fn check_trailing_angle_brackets(&mut self, segment: &PathSegment, end: TokenKind) { + // This function is intended to be invoked after parsing a path segment where there are two + // cases: + // + // 1. A specific token is expected after the path segment. + // eg. `x.foo(`, `x.foo::(` (parenthesis - method call), + // `Foo::`, or `Foo::::` (mod sep - continued path). + // 2. No specific token is expected after the path segment. + // eg. `x.foo` (field access) + // + // This function is called after parsing `.foo` and before parsing the token `end` (if + // present). This includes any angle bracket arguments, such as `.foo::` or + // `Foo::`. + + // We only care about trailing angle brackets if we previously parsed angle bracket + // arguments. This helps stop us incorrectly suggesting that extra angle brackets be + // removed in this case: + // + // `x.foo >> (3)` (where `x.foo` is a `u32` for example) + // + // This case is particularly tricky as we won't notice it just looking at the tokens - + // it will appear the same (in terms of upcoming tokens) as below (since the `::` will + // have already been parsed): + // + // `x.foo::>>(3)` + let parsed_angle_bracket_args = segment.args + .as_ref() + .map(|args| args.is_angle_bracketed()) + .unwrap_or(false); + + debug!( + "check_trailing_angle_brackets: parsed_angle_bracket_args={:?}", + parsed_angle_bracket_args, + ); + if !parsed_angle_bracket_args { + return; + } + + // Keep the span at the start so we can highlight the sequence of `>` characters to be + // removed. + let lo = self.token.span; + + // We need to look-ahead to see if we have `>` characters without moving the cursor forward + // (since we might have the field access case and the characters we're eating are + // actual operators and not trailing characters - ie `x.foo >> 3`). + let mut position = 0; + + // We can encounter `>` or `>>` tokens in any order, so we need to keep track of how + // many of each (so we can correctly pluralize our error messages) and continue to + // advance. + let mut number_of_shr = 0; + let mut number_of_gt = 0; + while self.look_ahead(position, |t| { + trace!("check_trailing_angle_brackets: t={:?}", t); + if *t == token::BinOp(token::BinOpToken::Shr) { + number_of_shr += 1; + true + } else if *t == token::Gt { + number_of_gt += 1; + true + } else { + false + } + }) { + position += 1; + } + + // If we didn't find any trailing `>` characters, then we have nothing to error about. + debug!( + "check_trailing_angle_brackets: number_of_gt={:?} number_of_shr={:?}", + number_of_gt, number_of_shr, + ); + if number_of_gt < 1 && number_of_shr < 1 { + return; + } + + // Finally, double check that we have our end token as otherwise this is the + // second case. + if self.look_ahead(position, |t| { + trace!("check_trailing_angle_brackets: t={:?}", t); + *t == end + }) { + // Eat from where we started until the end token so that parsing can continue + // as if we didn't have those extra angle brackets. + self.eat_to_tokens(&[&end]); + let span = lo.until(self.token.span); + + let total_num_of_gt = number_of_gt + number_of_shr * 2; + self.diagnostic() + .struct_span_err( + span, + &format!("unmatched angle bracket{}", pluralize!(total_num_of_gt)), + ) + .span_suggestion( + span, + &format!("remove extra angle bracket{}", pluralize!(total_num_of_gt)), + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + } + + /// Produces an error if comparison operators are chained (RFC #558). + /// We only need to check the LHS, not the RHS, because all comparison ops have same + /// precedence (see `fn precedence`) and are left-associative (see `fn fixity`). + /// + /// This can also be hit if someone incorrectly writes `foo()` when they should have used + /// the turbofish (`foo::()`) syntax. We attempt some heuristic recovery if that is the + /// case. + /// + /// Keep in mind that given that `outer_op.is_comparison()` holds and comparison ops are left + /// associative we can infer that we have: + /// + /// outer_op + /// / \ + /// inner_op r2 + /// / \ + /// l1 r1 + pub(super) fn check_no_chained_comparison( + &mut self, + lhs: &Expr, + outer_op: &AssocOp, + ) -> PResult<'a, Option>> { + debug_assert!( + outer_op.is_comparison(), + "check_no_chained_comparison: {:?} is not comparison", + outer_op, + ); + + let mk_err_expr = |this: &Self, span| { + Ok(Some(this.mk_expr(span, ExprKind::Err, ThinVec::new()))) + }; + + match lhs.kind { + ExprKind::Binary(op, _, _) if op.node.is_comparison() => { + // Respan to include both operators. + let op_span = op.span.to(self.prev_span); + let mut err = self.struct_span_err( + op_span, + "chained comparison operators require parentheses", + ); + + let suggest = |err: &mut DiagnosticBuilder<'_>| { + err.span_suggestion_verbose( + op_span.shrink_to_lo(), + TURBOFISH, + "::".to_string(), + Applicability::MaybeIncorrect, + ); + }; + + if op.node == BinOpKind::Lt && + *outer_op == AssocOp::Less || // Include `<` to provide this recommendation + *outer_op == AssocOp::Greater // even in a case like the following: + { // Foo>> + if *outer_op == AssocOp::Less { + let snapshot = self.clone(); + self.bump(); + // So far we have parsed `foo(` or `foo< bar >::`, so we rewind the + // parser and bail out. + mem::replace(self, snapshot.clone()); + } + } + return if token::ModSep == self.token.kind { + // We have some certainty that this was a bad turbofish at this point. + // `foo< bar >::` + suggest(&mut err); + + let snapshot = self.clone(); + self.bump(); // `::` + + // Consume the rest of the likely `foo::new()` or return at `foo`. + match self.parse_expr() { + Ok(_) => { + // 99% certain that the suggestion is correct, continue parsing. + err.emit(); + // FIXME: actually check that the two expressions in the binop are + // paths and resynthesize new fn call expression instead of using + // `ExprKind::Err` placeholder. + mk_err_expr(self, lhs.span.to(self.prev_span)) + } + Err(mut expr_err) => { + expr_err.cancel(); + // Not entirely sure now, but we bubble the error up with the + // suggestion. + mem::replace(self, snapshot); + Err(err) + } + } + } else if token::OpenDelim(token::Paren) == self.token.kind { + // We have high certainty that this was a bad turbofish at this point. + // `foo< bar >(` + suggest(&mut err); + // Consume the fn call arguments. + match self.consume_fn_args() { + Err(()) => Err(err), + Ok(()) => { + err.emit(); + // FIXME: actually check that the two expressions in the binop are + // paths and resynthesize new fn call expression instead of using + // `ExprKind::Err` placeholder. + mk_err_expr(self, lhs.span.to(self.prev_span)) + } + } + } else { + // All we know is that this is `foo < bar >` and *nothing* else. Try to + // be helpful, but don't attempt to recover. + err.help(TURBOFISH); + err.help("or use `(...)` if you meant to specify fn arguments"); + // These cases cause too many knock-down errors, bail out (#61329). + Err(err) + }; + } + err.emit(); + } + _ => {} + } + Ok(None) + } + + fn consume_fn_args(&mut self) -> Result<(), ()> { + let snapshot = self.clone(); + self.bump(); // `(` + + // Consume the fn call arguments. + let modifiers = [ + (token::OpenDelim(token::Paren), 1), + (token::CloseDelim(token::Paren), -1), + ]; + self.consume_tts(1, &modifiers[..]); + + if self.token.kind == token::Eof { + // Not entirely sure that what we consumed were fn arguments, rollback. + mem::replace(self, snapshot); + Err(()) + } else { + // 99% certain that the suggestion is correct, continue parsing. + Ok(()) + } + } + + pub(super) fn maybe_report_ambiguous_plus( + &mut self, + allow_plus: bool, + impl_dyn_multi: bool, + ty: &Ty, + ) { + if !allow_plus && impl_dyn_multi { + let sum_with_parens = format!("({})", pprust::ty_to_string(&ty)); + self.struct_span_err(ty.span, "ambiguous `+` in a type") + .span_suggestion( + ty.span, + "use parentheses to disambiguate", + sum_with_parens, + Applicability::MachineApplicable, + ) + .emit(); + } + } + + pub(super) fn maybe_recover_from_bad_type_plus( + &mut self, + allow_plus: bool, + ty: &Ty, + ) -> PResult<'a, ()> { + // Do not add `+` to expected tokens. + if !allow_plus || !self.token.is_like_plus() { + return Ok(()); + } + + self.bump(); // `+` + let bounds = self.parse_generic_bounds(None)?; + let sum_span = ty.span.to(self.prev_span); + + let mut err = struct_span_err!( + self.sess.span_diagnostic, + sum_span, + E0178, + "expected a path on the left-hand side of `+`, not `{}`", + pprust::ty_to_string(ty) + ); + + match ty.kind { + TyKind::Rptr(ref lifetime, ref mut_ty) => { + let sum_with_parens = pprust::to_string(|s| { + s.s.word("&"); + s.print_opt_lifetime(lifetime); + s.print_mutability(mut_ty.mutbl); + s.popen(); + s.print_type(&mut_ty.ty); + s.print_type_bounds(" +", &bounds); + s.pclose() + }); + err.span_suggestion( + sum_span, + "try adding parentheses", + sum_with_parens, + Applicability::MachineApplicable, + ); + } + TyKind::Ptr(..) | TyKind::BareFn(..) => { + err.span_label(sum_span, "perhaps you forgot parentheses?"); + } + _ => { + err.span_label(sum_span, "expected a path"); + } + } + err.emit(); + Ok(()) + } + + /// Tries to recover from associated item paths like `[T]::AssocItem` / `(T, U)::AssocItem`. + /// Attempts to convert the base expression/pattern/type into a type, parses the `::AssocItem` + /// tail, and combines them into a `::AssocItem` expression/pattern/type. + pub(super) fn maybe_recover_from_bad_qpath( + &mut self, + base: P, + allow_recovery: bool, + ) -> PResult<'a, P> { + // Do not add `::` to expected tokens. + if allow_recovery && self.token == token::ModSep { + if let Some(ty) = base.to_ty() { + return self.maybe_recover_from_bad_qpath_stage_2(ty.span, ty); + } + } + Ok(base) + } + + /// Given an already parsed `Ty`, parses the `::AssocItem` tail and + /// combines them into a `::AssocItem` expression/pattern/type. + pub(super) fn maybe_recover_from_bad_qpath_stage_2( + &mut self, + ty_span: Span, + ty: P, + ) -> PResult<'a, P> { + self.expect(&token::ModSep)?; + + let mut path = ast::Path { + segments: Vec::new(), + span: DUMMY_SP, + }; + self.parse_path_segments(&mut path.segments, T::PATH_STYLE)?; + path.span = ty_span.to(self.prev_span); + + let ty_str = self + .span_to_snippet(ty_span) + .unwrap_or_else(|_| pprust::ty_to_string(&ty)); + self.diagnostic() + .struct_span_err(path.span, "missing angle brackets in associated item path") + .span_suggestion( + // This is a best-effort recovery. + path.span, + "try", + format!("<{}>::{}", ty_str, pprust::path_to_string(&path)), + Applicability::MaybeIncorrect, + ) + .emit(); + + let path_span = ty_span.shrink_to_hi(); // Use an empty path since `position == 0`. + Ok(P(T::recovered( + Some(QSelf { + ty, + path_span, + position: 0, + }), + path, + ))) + } + + pub(super) fn maybe_consume_incorrect_semicolon(&mut self, items: &[P]) -> bool { + if self.eat(&token::Semi) { + let mut err = self.struct_span_err(self.prev_span, "expected item, found `;`"); + err.span_suggestion_short( + self.prev_span, + "remove this semicolon", + String::new(), + Applicability::MachineApplicable, + ); + if !items.is_empty() { + let previous_item = &items[items.len() - 1]; + let previous_item_kind_name = match previous_item.kind { + // Say "braced struct" because tuple-structs and + // braceless-empty-struct declarations do take a semicolon. + ItemKind::Struct(..) => Some("braced struct"), + ItemKind::Enum(..) => Some("enum"), + ItemKind::Trait(..) => Some("trait"), + ItemKind::Union(..) => Some("union"), + _ => None, + }; + if let Some(name) = previous_item_kind_name { + err.help(&format!( + "{} declarations are not followed by a semicolon", + name + )); + } + } + err.emit(); + true + } else { + false + } + } + + /// Creates a `DiagnosticBuilder` for an unexpected token `t` and tries to recover if it is a + /// closing delimiter. + pub(super) fn unexpected_try_recover( + &mut self, + t: &TokenKind, + ) -> PResult<'a, bool /* recovered */> { + let token_str = pprust::token_kind_to_string(t); + let this_token_str = self.this_token_descr(); + let (prev_sp, sp) = match (&self.token.kind, self.subparser_name) { + // Point at the end of the macro call when reaching end of macro arguments. + (token::Eof, Some(_)) => { + let sp = self.sess.source_map().next_point(self.token.span); + (sp, sp) + } + // We don't want to point at the following span after DUMMY_SP. + // This happens when the parser finds an empty TokenStream. + _ if self.prev_span == DUMMY_SP => (self.token.span, self.token.span), + // EOF, don't want to point at the following char, but rather the last token. + (token::Eof, None) => (self.prev_span, self.token.span), + _ => (self.sess.source_map().next_point(self.prev_span), self.token.span), + }; + let msg = format!( + "expected `{}`, found {}", + token_str, + match (&self.token.kind, self.subparser_name) { + (token::Eof, Some(origin)) => format!("end of {}", origin), + _ => this_token_str, + }, + ); + let mut err = self.struct_span_err(sp, &msg); + let label_exp = format!("expected `{}`", token_str); + match self.recover_closing_delimiter(&[t.clone()], err) { + Err(e) => err = e, + Ok(recovered) => { + return Ok(recovered); + } + } + let sm = self.sess.source_map(); + if !sm.is_multiline(prev_sp.until(sp)) { + // When the spans are in the same line, it means that the only content + // between them is whitespace, point only at the found token. + err.span_label(sp, label_exp); + } else { + err.span_label(prev_sp, label_exp); + err.span_label(sp, "unexpected token"); + } + Err(err) + } + + pub(super) fn expect_semi(&mut self) -> PResult<'a, ()> { + if self.eat(&token::Semi) { + return Ok(()); + } + let sm = self.sess.source_map(); + let msg = format!("expected `;`, found `{}`", self.this_token_descr()); + let appl = Applicability::MachineApplicable; + if self.token.span == DUMMY_SP || self.prev_span == DUMMY_SP { + // Likely inside a macro, can't provide meaninful suggestions. + return self.expect(&token::Semi).map(|_| ()); + } else if !sm.is_multiline(self.prev_span.until(self.token.span)) { + // The current token is in the same line as the prior token, not recoverable. + } else if self.look_ahead(1, |t| t == &token::CloseDelim(token::Brace) + || token_can_begin_expr(t) && t.kind != token::Colon + ) && [token::Comma, token::Colon].contains(&self.token.kind) { + // Likely typo: `,` → `;` or `:` → `;`. This is triggered if the current token is + // either `,` or `:`, and the next token could either start a new statement or is a + // block close. For example: + // + // let x = 32: + // let y = 42; + self.bump(); + let sp = self.prev_span; + self.struct_span_err(sp, &msg) + .span_suggestion(sp, "change this to `;`", ";".to_string(), appl) + .emit(); + return Ok(()) + } else if self.look_ahead(0, |t| t == &token::CloseDelim(token::Brace) || ( + token_can_begin_expr(t) + && t != &token::Semi + && t != &token::Pound // Avoid triggering with too many trailing `#` in raw string. + )) { + // Missing semicolon typo. This is triggered if the next token could either start a + // new statement or is a block close. For example: + // + // let x = 32 + // let y = 42; + let sp = self.prev_span.shrink_to_hi(); + self.struct_span_err(sp, &msg) + .span_label(self.token.span, "unexpected token") + .span_suggestion_short(sp, "add `;` here", ";".to_string(), appl) + .emit(); + return Ok(()) + } + self.expect(&token::Semi).map(|_| ()) // Error unconditionally + } + + pub(super) fn parse_semi_or_incorrect_foreign_fn_body( + &mut self, + ident: &Ident, + extern_sp: Span, + ) -> PResult<'a, ()> { + if self.token != token::Semi { + // This might be an incorrect fn definition (#62109). + let parser_snapshot = self.clone(); + match self.parse_inner_attrs_and_block() { + Ok((_, body)) => { + self.struct_span_err(ident.span, "incorrect `fn` inside `extern` block") + .span_label(ident.span, "can't have a body") + .span_label(body.span, "this body is invalid here") + .span_label( + extern_sp, + "`extern` blocks define existing foreign functions and `fn`s \ + inside of them cannot have a body") + .help("you might have meant to write a function accessible through ffi, \ + which can be done by writing `extern fn` outside of the \ + `extern` block") + .note("for more information, visit \ + https://doc.rust-lang.org/std/keyword.extern.html") + .emit(); + } + Err(mut err) => { + err.cancel(); + mem::replace(self, parser_snapshot); + self.expect_semi()?; + } + } + } else { + self.bump(); + } + Ok(()) + } + + /// Consumes alternative await syntaxes like `await!()`, `await `, + /// `await? `, `await()`, and `await { }`. + pub(super) fn parse_incorrect_await_syntax( + &mut self, + lo: Span, + await_sp: Span, + ) -> PResult<'a, (Span, ExprKind)> { + if self.token == token::Not { + // Handle `await!()`. + self.expect(&token::Not)?; + self.expect(&token::OpenDelim(token::Paren))?; + let expr = self.parse_expr()?; + self.expect(&token::CloseDelim(token::Paren))?; + let sp = self.error_on_incorrect_await(lo, self.prev_span, &expr, false); + return Ok((sp, ExprKind::Await(expr))) + } + + let is_question = self.eat(&token::Question); // Handle `await? `. + let expr = if self.token == token::OpenDelim(token::Brace) { + // Handle `await { }`. + // This needs to be handled separatedly from the next arm to avoid + // interpreting `await { }?` as `?.await`. + self.parse_block_expr( + None, + self.token.span, + BlockCheckMode::Default, + ThinVec::new(), + ) + } else { + self.parse_expr() + }.map_err(|mut err| { + err.span_label(await_sp, "while parsing this incorrect await expression"); + err + })?; + let sp = self.error_on_incorrect_await(lo, expr.span, &expr, is_question); + Ok((sp, ExprKind::Await(expr))) + } + + fn error_on_incorrect_await(&self, lo: Span, hi: Span, expr: &Expr, is_question: bool) -> Span { + let expr_str = self.span_to_snippet(expr.span) + .unwrap_or_else(|_| pprust::expr_to_string(&expr)); + let suggestion = format!("{}.await{}", expr_str, if is_question { "?" } else { "" }); + let sp = lo.to(hi); + let app = match expr.kind { + ExprKind::Try(_) => Applicability::MaybeIncorrect, // `await ?` + _ => Applicability::MachineApplicable, + }; + self.struct_span_err(sp, "incorrect use of `await`") + .span_suggestion(sp, "`await` is a postfix operation", suggestion, app) + .emit(); + sp + } + + /// If encountering `future.await()`, consumes and emits an error. + pub(super) fn recover_from_await_method_call(&mut self) { + if self.token == token::OpenDelim(token::Paren) && + self.look_ahead(1, |t| t == &token::CloseDelim(token::Paren)) + { + // future.await() + let lo = self.token.span; + self.bump(); // ( + let sp = lo.to(self.token.span); + self.bump(); // ) + self.struct_span_err(sp, "incorrect use of `await`") + .span_suggestion( + sp, + "`await` is not a method call, remove the parentheses", + String::new(), + Applicability::MachineApplicable, + ).emit() + } + } + + /// Recovers a situation like `for ( $pat in $expr )` + /// and suggest writing `for $pat in $expr` instead. + /// + /// This should be called before parsing the `$block`. + pub(super) fn recover_parens_around_for_head( + &mut self, + pat: P, + expr: &Expr, + begin_paren: Option, + ) -> P { + match (&self.token.kind, begin_paren) { + (token::CloseDelim(token::Paren), Some(begin_par_sp)) => { + self.bump(); + + let pat_str = self + // Remove the `(` from the span of the pattern: + .span_to_snippet(pat.span.trim_start(begin_par_sp).unwrap()) + .unwrap_or_else(|_| pprust::pat_to_string(&pat)); + + self.struct_span_err(self.prev_span, "unexpected closing `)`") + .span_label(begin_par_sp, "opening `(`") + .span_suggestion( + begin_par_sp.to(self.prev_span), + "remove parenthesis in `for` loop", + format!("{} in {}", pat_str, pprust::expr_to_string(&expr)), + // With e.g. `for (x) in y)` this would replace `(x) in y)` + // with `x) in y)` which is syntactically invalid. + // However, this is prevented before we get here. + Applicability::MachineApplicable, + ) + .emit(); + + // Unwrap `(pat)` into `pat` to avoid the `unused_parens` lint. + pat.and_then(|pat| match pat.kind { + PatKind::Paren(pat) => pat, + _ => P(pat), + }) + } + _ => pat, + } + } + + pub(super) fn could_ascription_be_path(&self, node: &ast::ExprKind) -> bool { + (self.token == token::Lt && // `foo: true, + _ => false, + } && + !self.token.is_reserved_ident() && // v `foo:bar(baz)` + self.look_ahead(1, |t| t == &token::OpenDelim(token::Paren)) || + self.look_ahead(1, |t| t == &token::Lt) && // `foo:bar` + } + + pub(super) fn recover_seq_parse_error( + &mut self, + delim: token::DelimToken, + lo: Span, + result: PResult<'a, P>, + ) -> P { + match result { + Ok(x) => x, + Err(mut err) => { + err.emit(); + // Recover from parse error, callers expect the closing delim to be consumed. + self.consume_block(delim, ConsumeClosingDelim::Yes); + self.mk_expr(lo.to(self.prev_span), ExprKind::Err, ThinVec::new()) + } + } + } + + pub(super) fn recover_closing_delimiter( + &mut self, + tokens: &[TokenKind], + mut err: DiagnosticBuilder<'a>, + ) -> PResult<'a, bool> { + let mut pos = None; + // We want to use the last closing delim that would apply. + for (i, unmatched) in self.unclosed_delims.iter().enumerate().rev() { + if tokens.contains(&token::CloseDelim(unmatched.expected_delim)) + && Some(self.token.span) > unmatched.unclosed_span + { + pos = Some(i); + } + } + match pos { + Some(pos) => { + // Recover and assume that the detected unclosed delimiter was meant for + // this location. Emit the diagnostic and act as if the delimiter was + // present for the parser's sake. + + // Don't attempt to recover from this unclosed delimiter more than once. + let unmatched = self.unclosed_delims.remove(pos); + let delim = TokenType::Token(token::CloseDelim(unmatched.expected_delim)); + if unmatched.found_delim.is_none() { + // We encountered `Eof`, set this fact here to avoid complaining about missing + // `fn main()` when we found place to suggest the closing brace. + *self.sess.reached_eof.borrow_mut() = true; + } + + // We want to suggest the inclusion of the closing delimiter where it makes + // the most sense, which is immediately after the last token: + // + // {foo(bar {}} + // - ^ + // | | + // | help: `)` may belong here + // | + // unclosed delimiter + if let Some(sp) = unmatched.unclosed_span { + err.span_label(sp, "unclosed delimiter"); + } + err.span_suggestion_short( + self.sess.source_map().next_point(self.prev_span), + &format!("{} may belong here", delim.to_string()), + delim.to_string(), + Applicability::MaybeIncorrect, + ); + if unmatched.found_delim.is_none() { + // Encountered `Eof` when lexing blocks. Do not recover here to avoid knockdown + // errors which would be emitted elsewhere in the parser and let other error + // recovery consume the rest of the file. + Err(err) + } else { + err.emit(); + self.expected_tokens.clear(); // Reduce the number of errors. + Ok(true) + } + } + _ => Err(err), + } + } + + /// Recovers from `pub` keyword in places where it seems _reasonable_ but isn't valid. + pub(super) fn eat_bad_pub(&mut self) { + // When `unclosed_delims` is populated, it means that the code being parsed is already + // quite malformed, which might mean that, for example, a pub struct definition could be + // parsed as being a trait item, which is invalid and this error would trigger + // unconditionally, resulting in misleading diagnostics. Because of this, we only attempt + // this nice to have recovery for code that is otherwise well formed. + if self.token.is_keyword(kw::Pub) && self.unclosed_delims.is_empty() { + match self.parse_visibility(false) { + Ok(vis) => { + self.diagnostic() + .struct_span_err(vis.span, "unnecessary visibility qualifier") + .span_label(vis.span, "`pub` not permitted here") + .emit(); + } + Err(mut err) => err.emit(), + } + } + } + + /// Eats tokens until we can be relatively sure we reached the end of the + /// statement. This is something of a best-effort heuristic. + /// + /// We terminate when we find an unmatched `}` (without consuming it). + pub(super) fn recover_stmt(&mut self) { + self.recover_stmt_(SemiColonMode::Ignore, BlockMode::Ignore) + } + + /// If `break_on_semi` is `Break`, then we will stop consuming tokens after + /// finding (and consuming) a `;` outside of `{}` or `[]` (note that this is + /// approximate -- it can mean we break too early due to macros, but that + /// should only lead to sub-optimal recovery, not inaccurate parsing). + /// + /// If `break_on_block` is `Break`, then we will stop consuming tokens + /// after finding (and consuming) a brace-delimited block. + pub(super) fn recover_stmt_( + &mut self, + break_on_semi: SemiColonMode, + break_on_block: BlockMode, + ) { + let mut brace_depth = 0; + let mut bracket_depth = 0; + let mut in_block = false; + debug!("recover_stmt_ enter loop (semi={:?}, block={:?})", + break_on_semi, break_on_block); + loop { + debug!("recover_stmt_ loop {:?}", self.token); + match self.token.kind { + token::OpenDelim(token::DelimToken::Brace) => { + brace_depth += 1; + self.bump(); + if break_on_block == BlockMode::Break && + brace_depth == 1 && + bracket_depth == 0 { + in_block = true; + } + } + token::OpenDelim(token::DelimToken::Bracket) => { + bracket_depth += 1; + self.bump(); + } + token::CloseDelim(token::DelimToken::Brace) => { + if brace_depth == 0 { + debug!("recover_stmt_ return - close delim {:?}", self.token); + break; + } + brace_depth -= 1; + self.bump(); + if in_block && bracket_depth == 0 && brace_depth == 0 { + debug!("recover_stmt_ return - block end {:?}", self.token); + break; + } + } + token::CloseDelim(token::DelimToken::Bracket) => { + bracket_depth -= 1; + if bracket_depth < 0 { + bracket_depth = 0; + } + self.bump(); + } + token::Eof => { + debug!("recover_stmt_ return - Eof"); + break; + } + token::Semi => { + self.bump(); + if break_on_semi == SemiColonMode::Break && + brace_depth == 0 && + bracket_depth == 0 { + debug!("recover_stmt_ return - Semi"); + break; + } + } + token::Comma if break_on_semi == SemiColonMode::Comma && + brace_depth == 0 && + bracket_depth == 0 => + { + debug!("recover_stmt_ return - Semi"); + break; + } + _ => { + self.bump() + } + } + } + } + + pub(super) fn check_for_for_in_in_typo(&mut self, in_span: Span) { + if self.eat_keyword(kw::In) { + // a common typo: `for _ in in bar {}` + self.struct_span_err(self.prev_span, "expected iterable, found keyword `in`") + .span_suggestion_short( + in_span.until(self.prev_span), + "remove the duplicated `in`", + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + } + + pub(super) fn expected_semi_or_open_brace(&mut self) -> PResult<'a, T> { + let token_str = self.this_token_descr(); + let mut err = self.fatal(&format!("expected `;` or `{{`, found {}", token_str)); + err.span_label(self.token.span, "expected `;` or `{`"); + Err(err) + } + + pub(super) fn eat_incorrect_doc_comment_for_param_type(&mut self) { + if let token::DocComment(_) = self.token.kind { + self.struct_span_err( + self.token.span, + "documentation comments cannot be applied to a function parameter's type", + ) + .span_label(self.token.span, "doc comments are not allowed here") + .emit(); + self.bump(); + } else if self.token == token::Pound && self.look_ahead(1, |t| { + *t == token::OpenDelim(token::Bracket) + }) { + let lo = self.token.span; + // Skip every token until next possible arg. + while self.token != token::CloseDelim(token::Bracket) { + self.bump(); + } + let sp = lo.to(self.token.span); + self.bump(); + self.struct_span_err( + sp, + "attributes cannot be applied to a function parameter's type", + ) + .span_label(sp, "attributes are not allowed here") + .emit(); + } + } + + pub(super) fn parameter_without_type( + &mut self, + err: &mut DiagnosticBuilder<'_>, + pat: P, + require_name: bool, + is_self_allowed: bool, + is_trait_item: bool, + ) -> Option { + // If we find a pattern followed by an identifier, it could be an (incorrect) + // C-style parameter declaration. + if self.check_ident() && self.look_ahead(1, |t| { + *t == token::Comma || *t == token::CloseDelim(token::Paren) + }) { // `fn foo(String s) {}` + let ident = self.parse_ident().unwrap(); + let span = pat.span.with_hi(ident.span.hi()); + + err.span_suggestion( + span, + "declare the type after the parameter binding", + String::from(": "), + Applicability::HasPlaceholders, + ); + return Some(ident); + } else if let PatKind::Ident(_, ident, _) = pat.kind { + if require_name && ( + is_trait_item || + self.token == token::Comma || + self.token == token::Lt || + self.token == token::CloseDelim(token::Paren) + ) { // `fn foo(a, b) {}`, `fn foo(a, b) {}` or `fn foo(usize, usize) {}` + if is_self_allowed { + err.span_suggestion( + pat.span, + "if this is a `self` type, give it a parameter name", + format!("self: {}", ident), + Applicability::MaybeIncorrect, + ); + } + // Avoid suggesting that `fn foo(HashMap)` is fixed with a change to + // `fn foo(HashMap: TypeName)`. + if self.token != token::Lt { + err.span_suggestion( + pat.span, + "if this was a parameter name, give it a type", + format!("{}: TypeName", ident), + Applicability::HasPlaceholders, + ); + } + err.span_suggestion( + pat.span, + "if this is a type, explicitly ignore the parameter name", + format!("_: {}", ident), + Applicability::MachineApplicable, + ); + err.note("anonymous parameters are removed in the 2018 edition (see RFC 1685)"); + + // Don't attempt to recover by using the `X` in `X` as the parameter name. + return if self.token == token::Lt { None } else { Some(ident) }; + } + } + None + } + + pub(super) fn recover_arg_parse(&mut self) -> PResult<'a, (P, P)> { + let pat = self.parse_pat(Some("argument name"))?; + self.expect(&token::Colon)?; + let ty = self.parse_ty()?; + + self.diagnostic() + .struct_span_err_with_code( + pat.span, + "patterns aren't allowed in methods without bodies", + DiagnosticId::Error("E0642".into()), + ) + .span_suggestion_short( + pat.span, + "give this argument a name or use an underscore to ignore it", + "_".to_owned(), + Applicability::MachineApplicable, + ) + .emit(); + + // Pretend the pattern is `_`, to avoid duplicate errors from AST validation. + let pat = P(Pat { + kind: PatKind::Wild, + span: pat.span, + id: ast::DUMMY_NODE_ID + }); + Ok((pat, ty)) + } + + pub(super) fn recover_bad_self_param( + &mut self, + mut param: ast::Param, + is_trait_item: bool, + ) -> PResult<'a, ast::Param> { + let sp = param.pat.span; + param.ty.kind = TyKind::Err; + let mut err = self.struct_span_err(sp, "unexpected `self` parameter in function"); + if is_trait_item { + err.span_label(sp, "must be the first associated function parameter"); + } else { + err.span_label(sp, "not valid as function parameter"); + err.note("`self` is only valid as the first parameter of an associated function"); + } + err.emit(); + Ok(param) + } + + pub(super) fn consume_block( + &mut self, + delim: token::DelimToken, + consume_close: ConsumeClosingDelim, + ) { + let mut brace_depth = 0; + loop { + if self.eat(&token::OpenDelim(delim)) { + brace_depth += 1; + } else if self.check(&token::CloseDelim(delim)) { + if brace_depth == 0 { + if let ConsumeClosingDelim::Yes = consume_close { + // Some of the callers of this method expect to be able to parse the + // closing delimiter themselves, so we leave it alone. Otherwise we advance + // the parser. + self.bump(); + } + return; + } else { + self.bump(); + brace_depth -= 1; + continue; + } + } else if self.token == token::Eof || self.eat(&token::CloseDelim(token::NoDelim)) { + return; + } else { + self.bump(); + } + } + } + + pub(super) fn expected_expression_found(&self) -> DiagnosticBuilder<'a> { + let (span, msg) = match (&self.token.kind, self.subparser_name) { + (&token::Eof, Some(origin)) => { + let sp = self.sess.source_map().next_point(self.token.span); + (sp, format!("expected expression, found end of {}", origin)) + } + _ => (self.token.span, format!( + "expected expression, found {}", + self.this_token_descr(), + )), + }; + let mut err = self.struct_span_err(span, &msg); + let sp = self.sess.source_map().start_point(self.token.span); + if let Some(sp) = self.sess.ambiguous_block_expr_parse.borrow().get(&sp) { + self.sess.expr_parentheses_needed(&mut err, *sp, None); + } + err.span_label(span, "expected expression"); + err + } + + fn consume_tts( + &mut self, + mut acc: i64, // `i64` because malformed code can have more closing delims than opening. + // Not using `FxHashMap` due to `token::TokenKind: !Eq + !Hash`. + modifier: &[(token::TokenKind, i64)], + ) { + while acc > 0 { + if let Some((_, val)) = modifier.iter().find(|(t, _)| *t == self.token.kind) { + acc += *val; + } + if self.token.kind == token::Eof { + break; + } + self.bump(); + } + } + + /// Replace duplicated recovered parameters with `_` pattern to avoid unecessary errors. + /// + /// This is necessary because at this point we don't know whether we parsed a function with + /// anonymous parameters or a function with names but no types. In order to minimize + /// unecessary errors, we assume the parameters are in the shape of `fn foo(a, b, c)` where + /// the parameters are *names* (so we don't emit errors about not being able to find `b` in + /// the local scope), but if we find the same name multiple times, like in `fn foo(i8, i8)`, + /// we deduplicate them to not complain about duplicated parameter names. + pub(super) fn deduplicate_recovered_params_names(&self, fn_inputs: &mut Vec) { + let mut seen_inputs = FxHashSet::default(); + for input in fn_inputs.iter_mut() { + let opt_ident = if let (PatKind::Ident(_, ident, _), TyKind::Err) = ( + &input.pat.kind, &input.ty.kind, + ) { + Some(*ident) + } else { + None + }; + if let Some(ident) = opt_ident { + if seen_inputs.contains(&ident) { + input.pat.kind = PatKind::Wild; + } + seen_inputs.insert(ident); + } + } + } +} diff --git a/src/librustc_parse/parser/expr.rs b/src/librustc_parse/parser/expr.rs new file mode 100644 index 00000000000..dadb91f8b3c --- /dev/null +++ b/src/librustc_parse/parser/expr.rs @@ -0,0 +1,1963 @@ +use super::{Parser, Restrictions, PrevTokenKind, TokenType, PathStyle, BlockMode}; +use super::{SemiColonMode, SeqSep, TokenExpectType}; +use super::pat::{GateOr, PARAM_EXPECTED}; +use super::diagnostics::Error; +use crate::maybe_recover_from_interpolated_ty_qpath; + +use syntax::ast::{ + self, DUMMY_NODE_ID, Attribute, AttrStyle, Ident, CaptureBy, BlockCheckMode, + Expr, ExprKind, RangeLimits, Label, Movability, IsAsync, Arm, Ty, TyKind, + FunctionRetTy, Param, FnDecl, BinOpKind, BinOp, UnOp, Mac, AnonConst, Field, Lit, +}; +use syntax::token::{self, Token, TokenKind}; +use syntax::print::pprust; +use syntax::ptr::P; +use syntax::source_map::{self, Span}; +use syntax::util::classify; +use syntax::util::literal::LitError; +use syntax::util::parser::{AssocOp, Fixity, prec_let_scrutinee_needs_par}; +use syntax_pos::symbol::{kw, sym}; +use syntax_pos::Symbol; +use errors::{PResult, Applicability}; +use std::mem; +use rustc_data_structures::thin_vec::ThinVec; + +/// Possibly accepts an `token::Interpolated` expression (a pre-parsed expression +/// dropped into the token stream, which happens while parsing the result of +/// macro expansion). Placement of these is not as complex as I feared it would +/// be. The important thing is to make sure that lookahead doesn't balk at +/// `token::Interpolated` tokens. +macro_rules! maybe_whole_expr { + ($p:expr) => { + if let token::Interpolated(nt) = &$p.token.kind { + match &**nt { + token::NtExpr(e) | token::NtLiteral(e) => { + let e = e.clone(); + $p.bump(); + return Ok(e); + } + token::NtPath(path) => { + let path = path.clone(); + $p.bump(); + return Ok($p.mk_expr( + $p.token.span, ExprKind::Path(None, path), ThinVec::new() + )); + } + token::NtBlock(block) => { + let block = block.clone(); + $p.bump(); + return Ok($p.mk_expr( + $p.token.span, ExprKind::Block(block, None), ThinVec::new() + )); + } + // N.B., `NtIdent(ident)` is normalized to `Ident` in `fn bump`. + _ => {}, + }; + } + } +} + +#[derive(Debug)] +pub(super) enum LhsExpr { + NotYetParsed, + AttributesParsed(ThinVec), + AlreadyParsed(P), +} + +impl From>> for LhsExpr { + /// Converts `Some(attrs)` into `LhsExpr::AttributesParsed(attrs)` + /// and `None` into `LhsExpr::NotYetParsed`. + /// + /// This conversion does not allocate. + fn from(o: Option>) -> Self { + if let Some(attrs) = o { + LhsExpr::AttributesParsed(attrs) + } else { + LhsExpr::NotYetParsed + } + } +} + +impl From> for LhsExpr { + /// Converts the `expr: P` into `LhsExpr::AlreadyParsed(expr)`. + /// + /// This conversion does not allocate. + fn from(expr: P) -> Self { + LhsExpr::AlreadyParsed(expr) + } +} + +impl<'a> Parser<'a> { + /// Parses an expression. + #[inline] + pub fn parse_expr(&mut self) -> PResult<'a, P> { + self.parse_expr_res(Restrictions::empty(), None) + } + + fn parse_paren_expr_seq(&mut self) -> PResult<'a, Vec>> { + self.parse_paren_comma_seq(|p| { + match p.parse_expr() { + Ok(expr) => Ok(expr), + Err(mut err) => match p.token.kind { + token::Ident(name, false) + if name == kw::Underscore && p.look_ahead(1, |t| { + t == &token::Comma + }) => { + // Special-case handling of `foo(_, _, _)` + err.emit(); + let sp = p.token.span; + p.bump(); + Ok(p.mk_expr(sp, ExprKind::Err, ThinVec::new())) + } + _ => Err(err), + }, + } + }).map(|(r, _)| r) + } + + /// Parses an expression, subject to the given restrictions. + #[inline] + pub(super) fn parse_expr_res( + &mut self, + r: Restrictions, + already_parsed_attrs: Option> + ) -> PResult<'a, P> { + self.with_res(r, |this| this.parse_assoc_expr(already_parsed_attrs)) + } + + /// Parses an associative expression. + /// + /// This parses an expression accounting for associativity and precedence of the operators in + /// the expression. + #[inline] + fn parse_assoc_expr( + &mut self, + already_parsed_attrs: Option>, + ) -> PResult<'a, P> { + self.parse_assoc_expr_with(0, already_parsed_attrs.into()) + } + + /// Parses an associative expression with operators of at least `min_prec` precedence. + pub(super) fn parse_assoc_expr_with( + &mut self, + min_prec: usize, + lhs: LhsExpr, + ) -> PResult<'a, P> { + let mut lhs = if let LhsExpr::AlreadyParsed(expr) = lhs { + expr + } else { + let attrs = match lhs { + LhsExpr::AttributesParsed(attrs) => Some(attrs), + _ => None, + }; + if [token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind) { + return self.parse_prefix_range_expr(attrs); + } else { + self.parse_prefix_expr(attrs)? + } + }; + let last_type_ascription_set = self.last_type_ascription.is_some(); + + match (self.expr_is_complete(&lhs), AssocOp::from_token(&self.token)) { + (true, None) => { + self.last_type_ascription = None; + // Semi-statement forms are odd. See https://github.com/rust-lang/rust/issues/29071 + return Ok(lhs); + } + (false, _) => {} // continue parsing the expression + // An exhaustive check is done in the following block, but these are checked first + // because they *are* ambiguous but also reasonable looking incorrect syntax, so we + // want to keep their span info to improve diagnostics in these cases in a later stage. + (true, Some(AssocOp::Multiply)) | // `{ 42 } *foo = bar;` or `{ 42 } * 3` + (true, Some(AssocOp::Subtract)) | // `{ 42 } -5` + (true, Some(AssocOp::LAnd)) | // `{ 42 } &&x` (#61475) + (true, Some(AssocOp::Add)) // `{ 42 } + 42 + // If the next token is a keyword, then the tokens above *are* unambiguously incorrect: + // `if x { a } else { b } && if y { c } else { d }` + if !self.look_ahead(1, |t| t.is_reserved_ident()) => { + self.last_type_ascription = None; + // These cases are ambiguous and can't be identified in the parser alone + let sp = self.sess.source_map().start_point(self.token.span); + self.sess.ambiguous_block_expr_parse.borrow_mut().insert(sp, lhs.span); + return Ok(lhs); + } + (true, Some(ref op)) if !op.can_continue_expr_unambiguously() => { + self.last_type_ascription = None; + return Ok(lhs); + } + (true, Some(_)) => { + // We've found an expression that would be parsed as a statement, but the next + // token implies this should be parsed as an expression. + // For example: `if let Some(x) = x { x } else { 0 } / 2` + let mut err = self.struct_span_err(self.token.span, &format!( + "expected expression, found `{}`", + pprust::token_to_string(&self.token), + )); + err.span_label(self.token.span, "expected expression"); + self.sess.expr_parentheses_needed( + &mut err, + lhs.span, + Some(pprust::expr_to_string(&lhs), + )); + err.emit(); + } + } + self.expected_tokens.push(TokenType::Operator); + while let Some(op) = AssocOp::from_token(&self.token) { + + // Adjust the span for interpolated LHS to point to the `$lhs` token and not to what + // it refers to. Interpolated identifiers are unwrapped early and never show up here + // as `PrevTokenKind::Interpolated` so if LHS is a single identifier we always process + // it as "interpolated", it doesn't change the answer for non-interpolated idents. + let lhs_span = match (self.prev_token_kind, &lhs.kind) { + (PrevTokenKind::Interpolated, _) => self.prev_span, + (PrevTokenKind::Ident, &ExprKind::Path(None, ref path)) + if path.segments.len() == 1 => self.prev_span, + _ => lhs.span, + }; + + let cur_op_span = self.token.span; + let restrictions = if op.is_assign_like() { + self.restrictions & Restrictions::NO_STRUCT_LITERAL + } else { + self.restrictions + }; + let prec = op.precedence(); + if prec < min_prec { + break; + } + // Check for deprecated `...` syntax + if self.token == token::DotDotDot && op == AssocOp::DotDotEq { + self.err_dotdotdot_syntax(self.token.span); + } + + if self.token == token::LArrow { + self.err_larrow_operator(self.token.span); + } + + self.bump(); + if op.is_comparison() { + if let Some(expr) = self.check_no_chained_comparison(&lhs, &op)? { + return Ok(expr); + } + } + // Special cases: + if op == AssocOp::As { + lhs = self.parse_assoc_op_cast(lhs, lhs_span, ExprKind::Cast)?; + continue + } else if op == AssocOp::Colon { + let maybe_path = self.could_ascription_be_path(&lhs.kind); + self.last_type_ascription = Some((self.prev_span, maybe_path)); + + lhs = self.parse_assoc_op_cast(lhs, lhs_span, ExprKind::Type)?; + self.sess.gated_spans.gate(sym::type_ascription, lhs.span); + continue + } else if op == AssocOp::DotDot || op == AssocOp::DotDotEq { + // If we didn’t have to handle `x..`/`x..=`, it would be pretty easy to + // generalise it to the Fixity::None code. + // + // We have 2 alternatives here: `x..y`/`x..=y` and `x..`/`x..=` The other + // two variants are handled with `parse_prefix_range_expr` call above. + let rhs = if self.is_at_start_of_range_notation_rhs() { + Some(self.parse_assoc_expr_with(prec + 1, LhsExpr::NotYetParsed)?) + } else { + None + }; + let (lhs_span, rhs_span) = (lhs.span, if let Some(ref x) = rhs { + x.span + } else { + cur_op_span + }); + let limits = if op == AssocOp::DotDot { + RangeLimits::HalfOpen + } else { + RangeLimits::Closed + }; + + let r = self.mk_range(Some(lhs), rhs, limits)?; + lhs = self.mk_expr(lhs_span.to(rhs_span), r, ThinVec::new()); + break + } + + let fixity = op.fixity(); + let prec_adjustment = match fixity { + Fixity::Right => 0, + Fixity::Left => 1, + // We currently have no non-associative operators that are not handled above by + // the special cases. The code is here only for future convenience. + Fixity::None => 1, + }; + let rhs = self.with_res( + restrictions - Restrictions::STMT_EXPR, + |this| this.parse_assoc_expr_with(prec + prec_adjustment, LhsExpr::NotYetParsed) + )?; + + // Make sure that the span of the parent node is larger than the span of lhs and rhs, + // including the attributes. + let lhs_span = lhs + .attrs + .iter() + .filter(|a| a.style == AttrStyle::Outer) + .next() + .map_or(lhs_span, |a| a.span); + let span = lhs_span.to(rhs.span); + lhs = match op { + AssocOp::Add | AssocOp::Subtract | AssocOp::Multiply | AssocOp::Divide | + AssocOp::Modulus | AssocOp::LAnd | AssocOp::LOr | AssocOp::BitXor | + AssocOp::BitAnd | AssocOp::BitOr | AssocOp::ShiftLeft | AssocOp::ShiftRight | + AssocOp::Equal | AssocOp::Less | AssocOp::LessEqual | AssocOp::NotEqual | + AssocOp::Greater | AssocOp::GreaterEqual => { + let ast_op = op.to_ast_binop().unwrap(); + let binary = self.mk_binary(source_map::respan(cur_op_span, ast_op), lhs, rhs); + self.mk_expr(span, binary, ThinVec::new()) + } + AssocOp::Assign => self.mk_expr(span, ExprKind::Assign(lhs, rhs), ThinVec::new()), + AssocOp::AssignOp(k) => { + let aop = match k { + token::Plus => BinOpKind::Add, + token::Minus => BinOpKind::Sub, + token::Star => BinOpKind::Mul, + token::Slash => BinOpKind::Div, + token::Percent => BinOpKind::Rem, + token::Caret => BinOpKind::BitXor, + token::And => BinOpKind::BitAnd, + token::Or => BinOpKind::BitOr, + token::Shl => BinOpKind::Shl, + token::Shr => BinOpKind::Shr, + }; + let aopexpr = self.mk_assign_op(source_map::respan(cur_op_span, aop), lhs, rhs); + self.mk_expr(span, aopexpr, ThinVec::new()) + } + AssocOp::As | AssocOp::Colon | AssocOp::DotDot | AssocOp::DotDotEq => { + self.bug("AssocOp should have been handled by special case") + } + }; + + if let Fixity::None = fixity { break } + } + if last_type_ascription_set { + self.last_type_ascription = None; + } + Ok(lhs) + } + + /// Checks if this expression is a successfully parsed statement. + fn expr_is_complete(&self, e: &Expr) -> bool { + self.restrictions.contains(Restrictions::STMT_EXPR) && + !classify::expr_requires_semi_to_be_stmt(e) + } + + fn is_at_start_of_range_notation_rhs(&self) -> bool { + if self.token.can_begin_expr() { + // Parse `for i in 1.. { }` as infinite loop, not as `for i in (1..{})`. + if self.token == token::OpenDelim(token::Brace) { + return !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL); + } + true + } else { + false + } + } + + /// Parses prefix-forms of range notation: `..expr`, `..`, `..=expr`. + fn parse_prefix_range_expr( + &mut self, + already_parsed_attrs: Option> + ) -> PResult<'a, P> { + // Check for deprecated `...` syntax. + if self.token == token::DotDotDot { + self.err_dotdotdot_syntax(self.token.span); + } + + debug_assert!([token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind), + "parse_prefix_range_expr: token {:?} is not DotDot/DotDotEq", + self.token); + let tok = self.token.clone(); + let attrs = self.parse_or_use_outer_attributes(already_parsed_attrs)?; + let lo = self.token.span; + let mut hi = self.token.span; + self.bump(); + let opt_end = if self.is_at_start_of_range_notation_rhs() { + // RHS must be parsed with more associativity than the dots. + let next_prec = AssocOp::from_token(&tok).unwrap().precedence() + 1; + Some(self.parse_assoc_expr_with(next_prec, LhsExpr::NotYetParsed) + .map(|x| { + hi = x.span; + x + })?) + } else { + None + }; + let limits = if tok == token::DotDot { + RangeLimits::HalfOpen + } else { + RangeLimits::Closed + }; + + let r = self.mk_range(None, opt_end, limits)?; + Ok(self.mk_expr(lo.to(hi), r, attrs)) + } + + /// Parses a prefix-unary-operator expr. + fn parse_prefix_expr( + &mut self, + already_parsed_attrs: Option> + ) -> PResult<'a, P> { + let attrs = self.parse_or_use_outer_attributes(already_parsed_attrs)?; + let lo = self.token.span; + // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr() + let (hi, ex) = match self.token.kind { + token::Not => { + self.bump(); + let e = self.parse_prefix_expr(None); + let (span, e) = self.interpolated_or_expr_span(e)?; + (lo.to(span), self.mk_unary(UnOp::Not, e)) + } + // Suggest `!` for bitwise negation when encountering a `~` + token::Tilde => { + self.bump(); + let e = self.parse_prefix_expr(None); + let (span, e) = self.interpolated_or_expr_span(e)?; + let span_of_tilde = lo; + self.struct_span_err(span_of_tilde, "`~` cannot be used as a unary operator") + .span_suggestion_short( + span_of_tilde, + "use `!` to perform bitwise not", + "!".to_owned(), + Applicability::MachineApplicable + ) + .emit(); + (lo.to(span), self.mk_unary(UnOp::Not, e)) + } + token::BinOp(token::Minus) => { + self.bump(); + let e = self.parse_prefix_expr(None); + let (span, e) = self.interpolated_or_expr_span(e)?; + (lo.to(span), self.mk_unary(UnOp::Neg, e)) + } + token::BinOp(token::Star) => { + self.bump(); + let e = self.parse_prefix_expr(None); + let (span, e) = self.interpolated_or_expr_span(e)?; + (lo.to(span), self.mk_unary(UnOp::Deref, e)) + } + token::BinOp(token::And) | token::AndAnd => { + self.expect_and()?; + let m = self.parse_mutability(); + let e = self.parse_prefix_expr(None); + let (span, e) = self.interpolated_or_expr_span(e)?; + (lo.to(span), ExprKind::AddrOf(m, e)) + } + token::Ident(..) if self.token.is_keyword(kw::Box) => { + self.bump(); + let e = self.parse_prefix_expr(None); + let (span, e) = self.interpolated_or_expr_span(e)?; + let span = lo.to(span); + self.sess.gated_spans.gate(sym::box_syntax, span); + (span, ExprKind::Box(e)) + } + token::Ident(..) if self.token.is_ident_named(sym::not) => { + // `not` is just an ordinary identifier in Rust-the-language, + // but as `rustc`-the-compiler, we can issue clever diagnostics + // for confused users who really want to say `!` + let token_cannot_continue_expr = |t: &Token| match t.kind { + // These tokens can start an expression after `!`, but + // can't continue an expression after an ident + token::Ident(name, is_raw) => token::ident_can_begin_expr(name, t.span, is_raw), + token::Literal(..) | token::Pound => true, + _ => t.is_whole_expr(), + }; + let cannot_continue_expr = self.look_ahead(1, token_cannot_continue_expr); + if cannot_continue_expr { + self.bump(); + // Emit the error ... + self.struct_span_err( + self.token.span, + &format!("unexpected {} after identifier",self.this_token_descr()) + ) + .span_suggestion_short( + // Span the `not` plus trailing whitespace to avoid + // trailing whitespace after the `!` in our suggestion + self.sess.source_map() + .span_until_non_whitespace(lo.to(self.token.span)), + "use `!` to perform logical negation", + "!".to_owned(), + Applicability::MachineApplicable + ) + .emit(); + // —and recover! (just as if we were in the block + // for the `token::Not` arm) + let e = self.parse_prefix_expr(None); + let (span, e) = self.interpolated_or_expr_span(e)?; + (lo.to(span), self.mk_unary(UnOp::Not, e)) + } else { + return self.parse_dot_or_call_expr(Some(attrs)); + } + } + _ => { return self.parse_dot_or_call_expr(Some(attrs)); } + }; + return Ok(self.mk_expr(lo.to(hi), ex, attrs)); + } + + /// Returns the span of expr, if it was not interpolated or the span of the interpolated token. + fn interpolated_or_expr_span( + &self, + expr: PResult<'a, P>, + ) -> PResult<'a, (Span, P)> { + expr.map(|e| { + if self.prev_token_kind == PrevTokenKind::Interpolated { + (self.prev_span, e) + } else { + (e.span, e) + } + }) + } + + fn parse_assoc_op_cast(&mut self, lhs: P, lhs_span: Span, + expr_kind: fn(P, P) -> ExprKind) + -> PResult<'a, P> { + let mk_expr = |this: &mut Self, rhs: P| { + this.mk_expr(lhs_span.to(rhs.span), expr_kind(lhs, rhs), ThinVec::new()) + }; + + // Save the state of the parser before parsing type normally, in case there is a + // LessThan comparison after this cast. + let parser_snapshot_before_type = self.clone(); + match self.parse_ty_no_plus() { + Ok(rhs) => { + Ok(mk_expr(self, rhs)) + } + Err(mut type_err) => { + // Rewind to before attempting to parse the type with generics, to recover + // from situations like `x as usize < y` in which we first tried to parse + // `usize < y` as a type with generic arguments. + let parser_snapshot_after_type = self.clone(); + mem::replace(self, parser_snapshot_before_type); + + match self.parse_path(PathStyle::Expr) { + Ok(path) => { + let (op_noun, op_verb) = match self.token.kind { + token::Lt => ("comparison", "comparing"), + token::BinOp(token::Shl) => ("shift", "shifting"), + _ => { + // We can end up here even without `<` being the next token, for + // example because `parse_ty_no_plus` returns `Err` on keywords, + // but `parse_path` returns `Ok` on them due to error recovery. + // Return original error and parser state. + mem::replace(self, parser_snapshot_after_type); + return Err(type_err); + } + }; + + // Successfully parsed the type path leaving a `<` yet to parse. + type_err.cancel(); + + // Report non-fatal diagnostics, keep `x as usize` as an expression + // in AST and continue parsing. + let msg = format!( + "`<` is interpreted as a start of generic arguments for `{}`, not a {}", + pprust::path_to_string(&path), + op_noun, + ); + let span_after_type = parser_snapshot_after_type.token.span; + let expr = mk_expr(self, P(Ty { + span: path.span, + kind: TyKind::Path(None, path), + id: DUMMY_NODE_ID, + })); + + let expr_str = self.span_to_snippet(expr.span) + .unwrap_or_else(|_| pprust::expr_to_string(&expr)); + + self.struct_span_err(self.token.span, &msg) + .span_label( + self.look_ahead(1, |t| t.span).to(span_after_type), + "interpreted as generic arguments" + ) + .span_label(self.token.span, format!("not interpreted as {}", op_noun)) + .span_suggestion( + expr.span, + &format!("try {} the cast value", op_verb), + format!("({})", expr_str), + Applicability::MachineApplicable, + ) + .emit(); + + Ok(expr) + } + Err(mut path_err) => { + // Couldn't parse as a path, return original error and parser state. + path_err.cancel(); + mem::replace(self, parser_snapshot_after_type); + Err(type_err) + } + } + } + } + } + + /// Parses `a.b` or `a(13)` or `a[4]` or just `a`. + fn parse_dot_or_call_expr( + &mut self, + already_parsed_attrs: Option>, + ) -> PResult<'a, P> { + let attrs = self.parse_or_use_outer_attributes(already_parsed_attrs)?; + + let b = self.parse_bottom_expr(); + let (span, b) = self.interpolated_or_expr_span(b)?; + self.parse_dot_or_call_expr_with(b, span, attrs) + } + + pub(super) fn parse_dot_or_call_expr_with( + &mut self, + e0: P, + lo: Span, + mut attrs: ThinVec, + ) -> PResult<'a, P> { + // Stitch the list of outer attributes onto the return value. + // A little bit ugly, but the best way given the current code + // structure + self.parse_dot_or_call_expr_with_(e0, lo).map(|expr| + expr.map(|mut expr| { + attrs.extend::>(expr.attrs.into()); + expr.attrs = attrs; + match expr.kind { + ExprKind::If(..) if !expr.attrs.is_empty() => { + // Just point to the first attribute in there... + let span = expr.attrs[0].span; + self.span_err(span, "attributes are not yet allowed on `if` expressions"); + } + _ => {} + } + expr + }) + ) + } + + fn parse_dot_or_call_expr_with_(&mut self, e0: P, lo: Span) -> PResult<'a, P> { + let mut e = e0; + let mut hi; + loop { + // expr? + while self.eat(&token::Question) { + let hi = self.prev_span; + e = self.mk_expr(lo.to(hi), ExprKind::Try(e), ThinVec::new()); + } + + // expr.f + if self.eat(&token::Dot) { + match self.token.kind { + token::Ident(..) => { + e = self.parse_dot_suffix(e, lo)?; + } + token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => { + let span = self.token.span; + self.bump(); + let field = ExprKind::Field(e, Ident::new(symbol, span)); + e = self.mk_expr(lo.to(span), field, ThinVec::new()); + + self.expect_no_suffix(span, "a tuple index", suffix); + } + token::Literal(token::Lit { kind: token::Float, symbol, .. }) => { + self.bump(); + let fstr = symbol.as_str(); + let msg = format!("unexpected token: `{}`", symbol); + let mut err = self.diagnostic().struct_span_err(self.prev_span, &msg); + err.span_label(self.prev_span, "unexpected token"); + if fstr.chars().all(|x| "0123456789.".contains(x)) { + let float = match fstr.parse::().ok() { + Some(f) => f, + None => continue, + }; + let sugg = pprust::to_string(|s| { + s.popen(); + s.print_expr(&e); + s.s.word( "."); + s.print_usize(float.trunc() as usize); + s.pclose(); + s.s.word("."); + s.s.word(fstr.splitn(2, ".").last().unwrap().to_string()) + }); + err.span_suggestion( + lo.to(self.prev_span), + "try parenthesizing the first index", + sugg, + Applicability::MachineApplicable + ); + } + return Err(err); + + } + _ => { + // FIXME Could factor this out into non_fatal_unexpected or something. + let actual = self.this_token_to_string(); + self.span_err(self.token.span, &format!("unexpected token: `{}`", actual)); + } + } + continue; + } + if self.expr_is_complete(&e) { break; } + match self.token.kind { + // expr(...) + token::OpenDelim(token::Paren) => { + let seq = self.parse_paren_expr_seq().map(|es| { + let nd = self.mk_call(e, es); + let hi = self.prev_span; + self.mk_expr(lo.to(hi), nd, ThinVec::new()) + }); + e = self.recover_seq_parse_error(token::Paren, lo, seq); + } + + // expr[...] + // Could be either an index expression or a slicing expression. + token::OpenDelim(token::Bracket) => { + self.bump(); + let ix = self.parse_expr()?; + hi = self.token.span; + self.expect(&token::CloseDelim(token::Bracket))?; + let index = self.mk_index(e, ix); + e = self.mk_expr(lo.to(hi), index, ThinVec::new()) + } + _ => return Ok(e) + } + } + return Ok(e); + } + + /// Assuming we have just parsed `.`, continue parsing into an expression. + fn parse_dot_suffix(&mut self, self_arg: P, lo: Span) -> PResult<'a, P> { + if self.token.span.rust_2018() && self.eat_keyword(kw::Await) { + return self.mk_await_expr(self_arg, lo); + } + + let segment = self.parse_path_segment(PathStyle::Expr)?; + self.check_trailing_angle_brackets(&segment, token::OpenDelim(token::Paren)); + + Ok(match self.token.kind { + token::OpenDelim(token::Paren) => { + // Method call `expr.f()` + let mut args = self.parse_paren_expr_seq()?; + args.insert(0, self_arg); + + let span = lo.to(self.prev_span); + self.mk_expr(span, ExprKind::MethodCall(segment, args), ThinVec::new()) + } + _ => { + // Field access `expr.f` + if let Some(args) = segment.args { + self.span_err(args.span(), + "field expressions may not have generic arguments"); + } + + let span = lo.to(self.prev_span); + self.mk_expr(span, ExprKind::Field(self_arg, segment.ident), ThinVec::new()) + } + }) + } + + /// At the bottom (top?) of the precedence hierarchy, + /// Parses things like parenthesized exprs, macros, `return`, etc. + /// + /// N.B., this does not parse outer attributes, and is private because it only works + /// correctly if called from `parse_dot_or_call_expr()`. + fn parse_bottom_expr(&mut self) -> PResult<'a, P> { + maybe_recover_from_interpolated_ty_qpath!(self, true); + maybe_whole_expr!(self); + + // Outer attributes are already parsed and will be + // added to the return value after the fact. + // + // Therefore, prevent sub-parser from parsing + // attributes by giving them a empty "already-parsed" list. + let mut attrs = ThinVec::new(); + + let lo = self.token.span; + let mut hi = self.token.span; + + let ex: ExprKind; + + macro_rules! parse_lit { + () => { + match self.parse_lit() { + Ok(literal) => { + hi = self.prev_span; + ex = ExprKind::Lit(literal); + } + Err(mut err) => { + err.cancel(); + return Err(self.expected_expression_found()); + } + } + } + } + + // Note: when adding new syntax here, don't forget to adjust `TokenKind::can_begin_expr()`. + match self.token.kind { + // This match arm is a special-case of the `_` match arm below and + // could be removed without changing functionality, but it's faster + // to have it here, especially for programs with large constants. + token::Literal(_) => { + parse_lit!() + } + token::OpenDelim(token::Paren) => { + self.bump(); + + attrs.extend(self.parse_inner_attributes()?); + + // `(e)` is parenthesized `e`. + // `(e,)` is a tuple with only one field, `e`. + let mut es = vec![]; + let mut trailing_comma = false; + let mut recovered = false; + while self.token != token::CloseDelim(token::Paren) { + es.push(match self.parse_expr() { + Ok(es) => es, + Err(mut err) => { + // Recover from parse error in tuple list. + match self.token.kind { + token::Ident(name, false) + if name == kw::Underscore && self.look_ahead(1, |t| { + t == &token::Comma + }) => { + // Special-case handling of `Foo<(_, _, _)>` + err.emit(); + let sp = self.token.span; + self.bump(); + self.mk_expr(sp, ExprKind::Err, ThinVec::new()) + } + _ => return Ok( + self.recover_seq_parse_error(token::Paren, lo, Err(err)), + ), + } + } + }); + recovered = self.expect_one_of( + &[], + &[token::Comma, token::CloseDelim(token::Paren)], + )?; + if self.eat(&token::Comma) { + trailing_comma = true; + } else { + trailing_comma = false; + break; + } + } + if !recovered { + self.bump(); + } + + hi = self.prev_span; + ex = if es.len() == 1 && !trailing_comma { + ExprKind::Paren(es.into_iter().nth(0).unwrap()) + } else { + ExprKind::Tup(es) + }; + } + token::OpenDelim(token::Brace) => { + return self.parse_block_expr(None, lo, BlockCheckMode::Default, attrs); + } + token::BinOp(token::Or) | token::OrOr => { + return self.parse_closure_expr(attrs); + } + token::OpenDelim(token::Bracket) => { + self.bump(); + + attrs.extend(self.parse_inner_attributes()?); + + if self.eat(&token::CloseDelim(token::Bracket)) { + // Empty vector + ex = ExprKind::Array(Vec::new()); + } else { + // Non-empty vector + let first_expr = self.parse_expr()?; + if self.eat(&token::Semi) { + // Repeating array syntax: `[ 0; 512 ]` + let count = AnonConst { + id: DUMMY_NODE_ID, + value: self.parse_expr()?, + }; + self.expect(&token::CloseDelim(token::Bracket))?; + ex = ExprKind::Repeat(first_expr, count); + } else if self.eat(&token::Comma) { + // Vector with two or more elements + let remaining_exprs = self.parse_seq_to_end( + &token::CloseDelim(token::Bracket), + SeqSep::trailing_allowed(token::Comma), + |p| Ok(p.parse_expr()?) + )?; + let mut exprs = vec![first_expr]; + exprs.extend(remaining_exprs); + ex = ExprKind::Array(exprs); + } else { + // Vector with one element + self.expect(&token::CloseDelim(token::Bracket))?; + ex = ExprKind::Array(vec![first_expr]); + } + } + hi = self.prev_span; + } + _ => { + if self.eat_lt() { + let (qself, path) = self.parse_qpath(PathStyle::Expr)?; + hi = path.span; + return Ok(self.mk_expr(lo.to(hi), ExprKind::Path(Some(qself), path), attrs)); + } + if self.token.is_path_start() { + let path = self.parse_path(PathStyle::Expr)?; + + // `!`, as an operator, is prefix, so we know this isn't that. + if self.eat(&token::Not) { + // MACRO INVOCATION expression + let (delim, tts) = self.expect_delimited_token_tree()?; + hi = self.prev_span; + ex = ExprKind::Mac(Mac { + path, + tts, + delim, + span: lo.to(hi), + prior_type_ascription: self.last_type_ascription, + }); + } else if self.check(&token::OpenDelim(token::Brace)) { + if let Some(expr) = self.maybe_parse_struct_expr(lo, &path, &attrs) { + return expr; + } else { + hi = path.span; + ex = ExprKind::Path(None, path); + } + } else { + hi = path.span; + ex = ExprKind::Path(None, path); + } + + let expr = self.mk_expr(lo.to(hi), ex, attrs); + return self.maybe_recover_from_bad_qpath(expr, true); + } + if self.check_keyword(kw::Move) || self.check_keyword(kw::Static) { + return self.parse_closure_expr(attrs); + } + if self.eat_keyword(kw::If) { + return self.parse_if_expr(attrs); + } + if self.eat_keyword(kw::For) { + let lo = self.prev_span; + return self.parse_for_expr(None, lo, attrs); + } + if self.eat_keyword(kw::While) { + let lo = self.prev_span; + return self.parse_while_expr(None, lo, attrs); + } + if let Some(label) = self.eat_label() { + let lo = label.ident.span; + self.expect(&token::Colon)?; + if self.eat_keyword(kw::While) { + return self.parse_while_expr(Some(label), lo, attrs) + } + if self.eat_keyword(kw::For) { + return self.parse_for_expr(Some(label), lo, attrs) + } + if self.eat_keyword(kw::Loop) { + return self.parse_loop_expr(Some(label), lo, attrs) + } + if self.token == token::OpenDelim(token::Brace) { + return self.parse_block_expr(Some(label), + lo, + BlockCheckMode::Default, + attrs); + } + let msg = "expected `while`, `for`, `loop` or `{` after a label"; + let mut err = self.fatal(msg); + err.span_label(self.token.span, msg); + return Err(err); + } + if self.eat_keyword(kw::Loop) { + let lo = self.prev_span; + return self.parse_loop_expr(None, lo, attrs); + } + if self.eat_keyword(kw::Continue) { + let label = self.eat_label(); + let ex = ExprKind::Continue(label); + let hi = self.prev_span; + return Ok(self.mk_expr(lo.to(hi), ex, attrs)); + } + if self.eat_keyword(kw::Match) { + let match_sp = self.prev_span; + return self.parse_match_expr(attrs).map_err(|mut err| { + err.span_label(match_sp, "while parsing this match expression"); + err + }); + } + if self.eat_keyword(kw::Unsafe) { + return self.parse_block_expr( + None, + lo, + BlockCheckMode::Unsafe(ast::UserProvided), + attrs); + } + if self.is_do_catch_block() { + let mut db = self.fatal("found removed `do catch` syntax"); + db.help("following RFC #2388, the new non-placeholder syntax is `try`"); + return Err(db); + } + if self.is_try_block() { + let lo = self.token.span; + assert!(self.eat_keyword(kw::Try)); + return self.parse_try_block(lo, attrs); + } + + // `Span::rust_2018()` is somewhat expensive; don't get it repeatedly. + let is_span_rust_2018 = self.token.span.rust_2018(); + if is_span_rust_2018 && self.check_keyword(kw::Async) { + return if self.is_async_block() { // Check for `async {` and `async move {`. + self.parse_async_block(attrs) + } else { + self.parse_closure_expr(attrs) + }; + } + if self.eat_keyword(kw::Return) { + if self.token.can_begin_expr() { + let e = self.parse_expr()?; + hi = e.span; + ex = ExprKind::Ret(Some(e)); + } else { + ex = ExprKind::Ret(None); + } + } else if self.eat_keyword(kw::Break) { + let label = self.eat_label(); + let e = if self.token.can_begin_expr() + && !(self.token == token::OpenDelim(token::Brace) + && self.restrictions.contains( + Restrictions::NO_STRUCT_LITERAL)) { + Some(self.parse_expr()?) + } else { + None + }; + ex = ExprKind::Break(label, e); + hi = self.prev_span; + } else if self.eat_keyword(kw::Yield) { + if self.token.can_begin_expr() { + let e = self.parse_expr()?; + hi = e.span; + ex = ExprKind::Yield(Some(e)); + } else { + ex = ExprKind::Yield(None); + } + + let span = lo.to(hi); + self.sess.gated_spans.gate(sym::generators, span); + } else if self.eat_keyword(kw::Let) { + return self.parse_let_expr(attrs); + } else if is_span_rust_2018 && self.eat_keyword(kw::Await) { + let (await_hi, e_kind) = self.parse_incorrect_await_syntax(lo, self.prev_span)?; + hi = await_hi; + ex = e_kind; + } else { + if !self.unclosed_delims.is_empty() && self.check(&token::Semi) { + // Don't complain about bare semicolons after unclosed braces + // recovery in order to keep the error count down. Fixing the + // delimiters will possibly also fix the bare semicolon found in + // expression context. For example, silence the following error: + // + // error: expected expression, found `;` + // --> file.rs:2:13 + // | + // 2 | foo(bar(; + // | ^ expected expression + self.bump(); + return Ok(self.mk_expr(self.token.span, ExprKind::Err, ThinVec::new())); + } + parse_lit!() + } + } + } + + let expr = self.mk_expr(lo.to(hi), ex, attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } + + /// Matches `lit = true | false | token_lit`. + pub(super) fn parse_lit(&mut self) -> PResult<'a, Lit> { + let mut recovered = None; + if self.token == token::Dot { + // Attempt to recover `.4` as `0.4`. + recovered = self.look_ahead(1, |next_token| { + if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) + = next_token.kind { + if self.token.span.hi() == next_token.span.lo() { + let s = String::from("0.") + &symbol.as_str(); + let kind = TokenKind::lit(token::Float, Symbol::intern(&s), suffix); + return Some(Token::new(kind, self.token.span.to(next_token.span))); + } + } + None + }); + if let Some(token) = &recovered { + self.bump(); + self.struct_span_err(token.span, "float literals must have an integer part") + .span_suggestion( + token.span, + "must have an integer part", + pprust::token_to_string(token), + Applicability::MachineApplicable, + ) + .emit(); + } + } + + let token = recovered.as_ref().unwrap_or(&self.token); + match Lit::from_token(token) { + Ok(lit) => { + self.bump(); + Ok(lit) + } + Err(LitError::NotLiteral) => { + let msg = format!("unexpected token: {}", self.this_token_descr()); + Err(self.span_fatal(token.span, &msg)) + } + Err(err) => { + let span = token.span; + let lit = match token.kind { + token::Literal(lit) => lit, + _ => unreachable!(), + }; + self.bump(); + self.error_literal_from_token(err, lit, span); + // Pack possible quotes and prefixes from the original literal into + // the error literal's symbol so they can be pretty-printed faithfully. + let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None); + let symbol = Symbol::intern(&suffixless_lit.to_string()); + let lit = token::Lit::new(token::Err, symbol, lit.suffix); + Lit::from_lit_token(lit, span).map_err(|_| unreachable!()) + } + } + } + + fn error_literal_from_token(&self, err: LitError, lit: token::Lit, span: Span) { + // Checks if `s` looks like i32 or u1234 etc. + fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.len() > 1 + && s.starts_with(first_chars) + && s[1..].chars().all(|c| c.is_ascii_digit()) + } + + let token::Lit { kind, suffix, .. } = lit; + match err { + // `NotLiteral` is not an error by itself, so we don't report + // it and give the parser opportunity to try something else. + LitError::NotLiteral => {} + // `LexerError` *is* an error, but it was already reported + // by lexer, so here we don't report it the second time. + LitError::LexerError => {} + LitError::InvalidSuffix => { + self.expect_no_suffix( + span, + &format!("{} {} literal", kind.article(), kind.descr()), + suffix, + ); + } + LitError::InvalidIntSuffix => { + let suf = suffix.expect("suffix error with no suffix").as_str(); + if looks_like_width_suffix(&['i', 'u'], &suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for integer literal", &suf[1..]); + self.struct_span_err(span, &msg) + .help("valid widths are 8, 16, 32, 64 and 128") + .emit(); + } else { + let msg = format!("invalid suffix `{}` for integer literal", suf); + self.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("the suffix must be one of the integral types (`u32`, `isize`, etc)") + .emit(); + } + } + LitError::InvalidFloatSuffix => { + let suf = suffix.expect("suffix error with no suffix").as_str(); + if looks_like_width_suffix(&['f'], &suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for float literal", &suf[1..]); + self.struct_span_err(span, &msg) + .help("valid widths are 32 and 64") + .emit(); + } else { + let msg = format!("invalid suffix `{}` for float literal", suf); + self.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("valid suffixes are `f32` and `f64`") + .emit(); + } + } + LitError::NonDecimalFloat(base) => { + let descr = match base { + 16 => "hexadecimal", + 8 => "octal", + 2 => "binary", + _ => unreachable!(), + }; + self.struct_span_err(span, &format!("{} float literal is not supported", descr)) + .span_label(span, "not supported") + .emit(); + } + LitError::IntTooLarge => { + self.struct_span_err(span, "integer literal is too large") + .emit(); + } + } + } + + pub(super) fn expect_no_suffix(&self, sp: Span, kind: &str, suffix: Option) { + if let Some(suf) = suffix { + let mut err = if kind == "a tuple index" + && [sym::i32, sym::u32, sym::isize, sym::usize].contains(&suf) + { + // #59553: warn instead of reject out of hand to allow the fix to percolate + // through the ecosystem when people fix their macros + let mut err = self.sess.span_diagnostic.struct_span_warn( + sp, + &format!("suffixes on {} are invalid", kind), + ); + err.note(&format!( + "`{}` is *temporarily* accepted on tuple index fields as it was \ + incorrectly accepted on stable for a few releases", + suf, + )); + err.help( + "on proc macros, you'll want to use `syn::Index::from` or \ + `proc_macro::Literal::*_unsuffixed` for code that will desugar \ + to tuple field access", + ); + err.note( + "for more context, see https://github.com/rust-lang/rust/issues/60210", + ); + err + } else { + self.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) + }; + err.span_label(sp, format!("invalid suffix `{}`", suf)); + err.emit(); + } + } + + /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). + pub fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P> { + maybe_whole_expr!(self); + + let minus_lo = self.token.span; + let minus_present = self.eat(&token::BinOp(token::Minus)); + let lo = self.token.span; + let literal = self.parse_lit()?; + let hi = self.prev_span; + let expr = self.mk_expr(lo.to(hi), ExprKind::Lit(literal), ThinVec::new()); + + if minus_present { + let minus_hi = self.prev_span; + let unary = self.mk_unary(UnOp::Neg, expr); + Ok(self.mk_expr(minus_lo.to(minus_hi), unary, ThinVec::new())) + } else { + Ok(expr) + } + } + + /// Parses a block or unsafe block. + pub(super) fn parse_block_expr( + &mut self, + opt_label: Option