diff options
| author | Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> | 2022-12-27 22:15:25 +0100 |
|---|---|---|
| committer | Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> | 2022-12-28 17:43:18 +0100 |
| commit | e6c02aad9345925cfed74f86b414c4d0715d381b (patch) | |
| tree | 98fd6b6b4199045d3c413f5cec5318dbb9b8ce22 /compiler/rustc_parse_format | |
| parent | 1322e476bf5eecfad98f0b200f15c1b46a0d46d2 (diff) | |
| download | rust-e6c02aad9345925cfed74f86b414c4d0715d381b.tar.gz rust-e6c02aad9345925cfed74f86b414c4d0715d381b.zip | |
Improve heuristics whether `format_args` string is a source literal
Previously, it only checked whether there was _a_ literal at the span of the first argument, not whether the literal actually matched up. This caused issues when a proc macro was generating a different literal with the same span. This requires an annoying special case for literals ending in `\n` because otherwise `println` wouldn't give detailed diagnostics anymore which would be bad.
Diffstat (limited to 'compiler/rustc_parse_format')
| -rw-r--r-- | compiler/rustc_parse_format/src/lib.rs | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index ab0463045fa..84243e53faf 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -20,6 +20,7 @@ pub use Flag::*; pub use Piece::*; pub use Position::*; +use rustc_lexer::unescape; use std::iter; use std::str; use std::string; @@ -306,7 +307,7 @@ impl<'a> Parser<'a> { append_newline: bool, mode: ParseMode, ) -> Parser<'a> { - let (width_map, is_literal) = find_width_map_from_snippet(snippet, style); + let (width_map, is_literal) = find_width_map_from_snippet(s, snippet, style); Parser { mode, input: s, @@ -844,6 +845,7 @@ impl<'a> Parser<'a> { /// written code (code snippet) and the `InternedString` that gets processed in the `Parser` /// in order to properly synthesise the intra-string `Span`s for error diagnostics. fn find_width_map_from_snippet( + input: &str, snippet: Option<string::String>, str_style: Option<usize>, ) -> (Vec<InnerWidthMapping>, bool) { @@ -856,8 +858,27 @@ fn find_width_map_from_snippet( return (vec![], true); } + // Strip quotes. let snippet = &snippet[1..snippet.len() - 1]; + // Macros like `println` add a newline at the end. That technically doens't make them "literals" anymore, but it's fine + // since we will never need to point our spans there, so we lie about it here by ignoring it. + // Since there might actually be newlines in the source code, we need to normalize away all trailing newlines. + // If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up. + // Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up. + let input_no_nl = input.trim_end_matches('\n'); + let Ok(unescaped) = unescape_string(snippet) else { + return (vec![], false); + }; + + let unescaped_no_nl = unescaped.trim_end_matches('\n'); + + if unescaped_no_nl != input_no_nl { + // The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect. + // This can for example happen with proc macros that respan generated literals. + return (vec![], false); + } + let mut s = snippet.char_indices(); let mut width_mappings = vec![]; while let Some((pos, c)) = s.next() { @@ -936,9 +957,23 @@ fn find_width_map_from_snippet( _ => {} } } + (width_mappings, true) } +fn unescape_string(string: &str) -> Result<string::String, unescape::EscapeError> { + let mut buf = string::String::new(); + let mut error = Ok(()); + unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| { + match unescaped_char { + Ok(c) => buf.push(c), + Err(err) => error = Err(err), + } + }); + + error.map(|_| buf) +} + // Assert a reasonable size for `Piece` #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] rustc_data_structures::static_assert_size!(Piece<'_>, 16); |
