about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2022-12-29 11:20:50 +0000
committerbors <bors@rust-lang.org>2022-12-29 11:20:50 +0000
commit0c0b403f19fc6febcd1e36a83fc307ecc11de943 (patch)
tree461a78f80026dd6bec5e42bfca94331c1ccaf9fa
parent11a338ab6644cf454c45d2b41651900610a55b07 (diff)
parent31b490d8ba8ff60b9d9ee3ccca522629429d9a3f (diff)
downloadrust-0c0b403f19fc6febcd1e36a83fc307ecc11de943.tar.gz
rust-0c0b403f19fc6febcd1e36a83fc307ecc11de943.zip
Auto merge of #106195 - Nilstrieb:no-more-being-clueless-whether-it-really-is-a-literal, r=compiler-errors
Improve heuristics whether `format_args` string is a source literal

Previously, it only checked whether there was _a_ literal at the span of the first argument, not whether the literal actually matched up. This caused issues when a proc macro was generating a different literal with the same span.

This requires an annoying special case for literals ending in `\n` because otherwise `println` wouldn't give detailed diagnostics anymore which would be bad.

Fixes #106191
-rw-r--r--compiler/rustc_parse_format/src/lib.rs56
-rw-r--r--compiler/rustc_span/src/source_map.rs29
-rw-r--r--src/test/ui/fmt/auxiliary/format-string-proc-macro.rs14
-rw-r--r--src/test/ui/fmt/respanned-literal-issue-106191.rs10
-rw-r--r--src/test/ui/fmt/respanned-literal-issue-106191.stderr19
5 files changed, 105 insertions, 23 deletions
diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs
index ab0463045fa..9f2aaca0acf 100644
--- a/compiler/rustc_parse_format/src/lib.rs
+++ b/compiler/rustc_parse_format/src/lib.rs
@@ -20,6 +20,7 @@ pub use Flag::*;
 pub use Piece::*;
 pub use Position::*;
 
+use rustc_lexer::unescape;
 use std::iter;
 use std::str;
 use std::string;
@@ -56,6 +57,13 @@ impl InnerWidthMapping {
     }
 }
 
+/// Whether the input string is a literal. If yes, it contains the inner width mappings.
+#[derive(Clone, PartialEq, Eq)]
+enum InputStringKind {
+    NotALiteral,
+    Literal { width_mappings: Vec<InnerWidthMapping> },
+}
+
 /// The type of format string that we are parsing.
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub enum ParseMode {
@@ -306,7 +314,11 @@ impl<'a> Parser<'a> {
         append_newline: bool,
         mode: ParseMode,
     ) -> Parser<'a> {
-        let (width_map, is_literal) = find_width_map_from_snippet(snippet, style);
+        let input_string_kind = find_width_map_from_snippet(s, snippet, style);
+        let (width_map, is_literal) = match input_string_kind {
+            InputStringKind::Literal { width_mappings } => (width_mappings, true),
+            InputStringKind::NotALiteral => (Vec::new(), false),
+        };
         Parser {
             mode,
             input: s,
@@ -844,20 +856,40 @@ impl<'a> Parser<'a> {
 /// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
 /// in order to properly synthesise the intra-string `Span`s for error diagnostics.
 fn find_width_map_from_snippet(
+    input: &str,
     snippet: Option<string::String>,
     str_style: Option<usize>,
-) -> (Vec<InnerWidthMapping>, bool) {
+) -> InputStringKind {
     let snippet = match snippet {
         Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
-        _ => return (vec![], false),
+        _ => return InputStringKind::NotALiteral,
     };
 
     if str_style.is_some() {
-        return (vec![], true);
+        return InputStringKind::Literal { width_mappings: Vec::new() };
     }
 
+    // Strip quotes.
     let snippet = &snippet[1..snippet.len() - 1];
 
+    // Macros like `println` add a newline at the end. That technically doens't make them "literals" anymore, but it's fine
+    // since we will never need to point our spans there, so we lie about it here by ignoring it.
+    // Since there might actually be newlines in the source code, we need to normalize away all trailing newlines.
+    // If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up.
+    // Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up.
+    let input_no_nl = input.trim_end_matches('\n');
+    let Ok(unescaped) = unescape_string(snippet) else {
+        return InputStringKind::NotALiteral;
+    };
+
+    let unescaped_no_nl = unescaped.trim_end_matches('\n');
+
+    if unescaped_no_nl != input_no_nl {
+        // The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect.
+        // This can for example happen with proc macros that respan generated literals.
+        return InputStringKind::NotALiteral;
+    }
+
     let mut s = snippet.char_indices();
     let mut width_mappings = vec![];
     while let Some((pos, c)) = s.next() {
@@ -936,7 +968,21 @@ fn find_width_map_from_snippet(
             _ => {}
         }
     }
-    (width_mappings, true)
+
+    InputStringKind::Literal { width_mappings }
+}
+
+fn unescape_string(string: &str) -> Result<string::String, unescape::EscapeError> {
+    let mut buf = string::String::new();
+    let mut error = Ok(());
+    unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| {
+        match unescaped_char {
+            Ok(c) => buf.push(c),
+            Err(err) => error = Err(err),
+        }
+    });
+
+    error.map(|_| buf)
 }
 
 // Assert a reasonable size for `Piece`
diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs
index d9c87ac0ba8..fa09b4faa44 100644
--- a/compiler/rustc_span/src/source_map.rs
+++ b/compiler/rustc_span/src/source_map.rs
@@ -964,45 +964,40 @@ impl SourceMap {
 
     /// Finds the width of the character, either before or after the end of provided span,
     /// depending on the `forwards` parameter.
+    #[instrument(skip(self, sp))]
     fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 {
         let sp = sp.data();
 
         if sp.lo == sp.hi && !forwards {
-            debug!("find_width_of_character_at_span: early return empty span");
+            debug!("early return empty span");
             return 1;
         }
 
         let local_begin = self.lookup_byte_offset(sp.lo);
         let local_end = self.lookup_byte_offset(sp.hi);
-        debug!(
-            "find_width_of_character_at_span: local_begin=`{:?}`, local_end=`{:?}`",
-            local_begin, local_end
-        );
+        debug!("local_begin=`{:?}`, local_end=`{:?}`", local_begin, local_end);
 
         if local_begin.sf.start_pos != local_end.sf.start_pos {
-            debug!("find_width_of_character_at_span: begin and end are in different files");
+            debug!("begin and end are in different files");
             return 1;
         }
 
         let start_index = local_begin.pos.to_usize();
         let end_index = local_end.pos.to_usize();
-        debug!(
-            "find_width_of_character_at_span: start_index=`{:?}`, end_index=`{:?}`",
-            start_index, end_index
-        );
+        debug!("start_index=`{:?}`, end_index=`{:?}`", start_index, end_index);
 
         // Disregard indexes that are at the start or end of their spans, they can't fit bigger
         // characters.
         if (!forwards && end_index == usize::MIN) || (forwards && start_index == usize::MAX) {
-            debug!("find_width_of_character_at_span: start or end of span, cannot be multibyte");
+            debug!("start or end of span, cannot be multibyte");
             return 1;
         }
 
         let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize();
-        debug!("find_width_of_character_at_span: source_len=`{:?}`", source_len);
+        debug!("source_len=`{:?}`", source_len);
         // Ensure indexes are also not malformed.
         if start_index > end_index || end_index > source_len - 1 {
-            debug!("find_width_of_character_at_span: source indexes are malformed");
+            debug!("source indexes are malformed");
             return 1;
         }
 
@@ -1017,10 +1012,10 @@ impl SourceMap {
         } else {
             return 1;
         };
-        debug!("find_width_of_character_at_span: snippet=`{:?}`", snippet);
+        debug!("snippet=`{:?}`", snippet);
 
         let mut target = if forwards { end_index + 1 } else { end_index - 1 };
-        debug!("find_width_of_character_at_span: initial target=`{:?}`", target);
+        debug!("initial target=`{:?}`", target);
 
         while !snippet.is_char_boundary(target - start_index) && target < source_len {
             target = if forwards {
@@ -1033,9 +1028,9 @@ impl SourceMap {
                     }
                 }
             };
-            debug!("find_width_of_character_at_span: target=`{:?}`", target);
+            debug!("target=`{:?}`", target);
         }
-        debug!("find_width_of_character_at_span: final target=`{:?}`", target);
+        debug!("final target=`{:?}`", target);
 
         if forwards { (target - end_index) as u32 } else { (end_index - target) as u32 }
     }
diff --git a/src/test/ui/fmt/auxiliary/format-string-proc-macro.rs b/src/test/ui/fmt/auxiliary/format-string-proc-macro.rs
index e44a84776bc..539c8fb27b3 100644
--- a/src/test/ui/fmt/auxiliary/format-string-proc-macro.rs
+++ b/src/test/ui/fmt/auxiliary/format-string-proc-macro.rs
@@ -5,7 +5,8 @@
 
 extern crate proc_macro;
 
-use proc_macro::{Literal, Span, TokenStream, TokenTree};
+use proc_macro::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
+use std::iter::FromIterator;
 
 #[proc_macro]
 pub fn foo_with_input_span(input: TokenStream) -> TokenStream {
@@ -26,3 +27,14 @@ pub fn err_with_input_span(input: TokenStream) -> TokenStream {
 
     TokenStream::from(TokenTree::Literal(lit))
 }
+
+#[proc_macro]
+pub fn respan_to_invalid_format_literal(input: TokenStream) -> TokenStream {
+    let mut s = Literal::string("{");
+    s.set_span(input.into_iter().next().unwrap().span());
+    TokenStream::from_iter([
+        TokenTree::from(Ident::new("format", Span::call_site())),
+        TokenTree::from(Punct::new('!', Spacing::Alone)),
+        TokenTree::from(Group::new(Delimiter::Parenthesis, TokenTree::from(s).into())),
+    ])
+}
diff --git a/src/test/ui/fmt/respanned-literal-issue-106191.rs b/src/test/ui/fmt/respanned-literal-issue-106191.rs
new file mode 100644
index 00000000000..44642a10fc0
--- /dev/null
+++ b/src/test/ui/fmt/respanned-literal-issue-106191.rs
@@ -0,0 +1,10 @@
+// aux-build:format-string-proc-macro.rs
+
+extern crate format_string_proc_macro;
+
+fn main() {
+    format_string_proc_macro::respan_to_invalid_format_literal!("¡");
+    //~^ ERROR invalid format string: expected `'}'` but string was terminated
+    format_args!(r#concat!("¡        {"));
+    //~^ ERROR invalid format string: expected `'}'` but string was terminated
+}
diff --git a/src/test/ui/fmt/respanned-literal-issue-106191.stderr b/src/test/ui/fmt/respanned-literal-issue-106191.stderr
new file mode 100644
index 00000000000..73a3af65a38
--- /dev/null
+++ b/src/test/ui/fmt/respanned-literal-issue-106191.stderr
@@ -0,0 +1,19 @@
+error: invalid format string: expected `'}'` but string was terminated
+  --> $DIR/respanned-literal-issue-106191.rs:6:65
+   |
+LL |     format_string_proc_macro::respan_to_invalid_format_literal!("¡");
+   |                                                                 ^^^ expected `'}'` in format string
+   |
+   = note: if you intended to print `{`, you can escape it using `{{`
+
+error: invalid format string: expected `'}'` but string was terminated
+  --> $DIR/respanned-literal-issue-106191.rs:8:18
+   |
+LL |     format_args!(r#concat!("¡        {"));
+   |                  ^^^^^^^^^^^^^^^^^^^^^^^ expected `'}'` in format string
+   |
+   = note: if you intended to print `{`, you can escape it using `{{`
+   = note: this error originates in the macro `concat` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+error: aborting due to 2 previous errors
+