about summary refs log tree commit diff
path: root/compiler/rustc_parse_format/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2022-12-26 12:03:47 +0000
committerbors <bors@rust-lang.org>2022-12-26 12:03:47 +0000
commit731e0bf721c1ec2c7843547e86b6157b40a437d1 (patch)
tree2adf07470f23ec77fecd22bae1c7c1366ebe9da6 /compiler/rustc_parse_format/src
parentf206533fd40378da6e2a07567e8d7592edd13ee4 (diff)
parent35c7939ae5d3a0e34963bbd57bbc4b91c3ce0ce3 (diff)
downloadrust-731e0bf721c1ec2c7843547e86b6157b40a437d1.tar.gz
rust-731e0bf721c1ec2c7843547e86b6157b40a437d1.zip
Auto merge of #103828 - cassaundra:fix-format-args-span2, r=cjgillot
Fix incorrect span when using byte-escaped rbrace

Fix #103826, a format args span issue introduced in #102214.

The current solution for tracking skipped characters made it so that certain situations were ambiguous enough that the original span couldn't be worked out later. This PR improves on the original solution by keeping track of groups of skipped characters using a map, and fixes the previous bug. See an example of this ambiguity in the [previous PR's discussion](https://github.com/rust-lang/rust/pull/102214#issuecomment-1258711015).
Diffstat (limited to 'compiler/rustc_parse_format/src')
-rw-r--r--compiler/rustc_parse_format/src/lib.rs127
1 files changed, 79 insertions, 48 deletions
diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs
index 9cbe04c1288..ab0463045fa 100644
--- a/compiler/rustc_parse_format/src/lib.rs
+++ b/compiler/rustc_parse_format/src/lib.rs
@@ -38,6 +38,24 @@ impl InnerSpan {
     }
 }
 
+/// The location and before/after width of a character whose width has changed from its source code
+/// representation
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct InnerWidthMapping {
+    /// Index of the character in the source
+    pub position: usize,
+    /// The inner width in characters
+    pub before: usize,
+    /// The transformed width in characters
+    pub after: usize,
+}
+
+impl InnerWidthMapping {
+    pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
+        InnerWidthMapping { position, before, after }
+    }
+}
+
 /// The type of format string that we are parsing.
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub enum ParseMode {
@@ -200,8 +218,8 @@ pub struct Parser<'a> {
     style: Option<usize>,
     /// Start and end byte offset of every successfully parsed argument
     pub arg_places: Vec<InnerSpan>,
-    /// Characters that need to be shifted
-    skips: Vec<usize>,
+    /// Characters whose length has been changed from their in-code representation
+    width_map: Vec<InnerWidthMapping>,
     /// Span of the last opening brace seen, used for error reporting
     last_opening_brace: Option<InnerSpan>,
     /// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
@@ -224,7 +242,7 @@ impl<'a> Iterator for Parser<'a> {
                 '{' => {
                     let curr_last_brace = self.last_opening_brace;
                     let byte_pos = self.to_span_index(pos);
-                    let lbrace_end = self.to_span_index(pos + 1);
+                    let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
                     self.last_opening_brace = Some(byte_pos.to(lbrace_end));
                     self.cur.next();
                     if self.consume('{') {
@@ -233,12 +251,15 @@ impl<'a> Iterator for Parser<'a> {
                         Some(String(self.string(pos + 1)))
                     } else {
                         let arg = self.argument(lbrace_end);
-                        if let Some(rbrace_byte_idx) = self.must_consume('}') {
-                            let lbrace_inner_offset = self.to_span_index(pos);
-                            let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx);
+                        if let Some(rbrace_pos) = self.must_consume('}') {
                             if self.is_literal {
+                                let lbrace_byte_pos = self.to_span_index(pos);
+                                let rbrace_byte_pos = self.to_span_index(rbrace_pos);
+
+                                let width = self.to_span_width(rbrace_pos);
+
                                 self.arg_places.push(
-                                    lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)),
+                                    lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
                                 );
                             }
                         } else {
@@ -285,7 +306,7 @@ impl<'a> Parser<'a> {
         append_newline: bool,
         mode: ParseMode,
     ) -> Parser<'a> {
-        let (skips, is_literal) = find_skips_from_snippet(snippet, style);
+        let (width_map, is_literal) = find_width_map_from_snippet(snippet, style);
         Parser {
             mode,
             input: s,
@@ -294,7 +315,7 @@ impl<'a> Parser<'a> {
             curarg: 0,
             style,
             arg_places: vec![],
-            skips,
+            width_map,
             last_opening_brace: None,
             append_newline,
             is_literal,
@@ -367,21 +388,34 @@ impl<'a> Parser<'a> {
         None
     }
 
+    fn remap_pos(&self, mut pos: usize) -> InnerOffset {
+        for width in &self.width_map {
+            if pos > width.position {
+                pos += width.before - width.after;
+            } else if pos == width.position && width.after == 0 {
+                pos += width.before;
+            } else {
+                break;
+            }
+        }
+
+        InnerOffset(pos)
+    }
+
     fn to_span_index(&self, pos: usize) -> InnerOffset {
-        let mut pos = pos;
         // This handles the raw string case, the raw argument is the number of #
         // in r###"..."### (we need to add one because of the `r`).
         let raw = self.style.map_or(0, |raw| raw + 1);
-        for skip in &self.skips {
-            if pos > *skip {
-                pos += 1;
-            } else if pos == *skip && raw == 0 {
-                pos += 1;
-            } else {
-                break;
-            }
+        let pos = self.remap_pos(pos);
+        InnerOffset(raw + pos.0 + 1)
+    }
+
+    fn to_span_width(&self, pos: usize) -> usize {
+        let pos = self.remap_pos(pos);
+        match self.width_map.iter().find(|w| w.position == pos.0) {
+            Some(w) => w.before,
+            None => 1,
         }
-        InnerOffset(raw + pos + 1)
     }
 
     fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
@@ -809,10 +843,10 @@ impl<'a> Parser<'a> {
 /// Finds the indices of all characters that have been processed and differ between the actual
 /// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
 /// in order to properly synthesise the intra-string `Span`s for error diagnostics.
-fn find_skips_from_snippet(
+fn find_width_map_from_snippet(
     snippet: Option<string::String>,
     str_style: Option<usize>,
-) -> (Vec<usize>, bool) {
+) -> (Vec<InnerWidthMapping>, bool) {
     let snippet = match snippet {
         Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
         _ => return (vec![], false),
@@ -825,43 +859,39 @@ fn find_skips_from_snippet(
     let snippet = &snippet[1..snippet.len() - 1];
 
     let mut s = snippet.char_indices();
-    let mut skips = vec![];
+    let mut width_mappings = vec![];
     while let Some((pos, c)) = s.next() {
         match (c, s.clone().next()) {
             // skip whitespace and empty lines ending in '\\'
-            ('\\', Some((next_pos, '\n'))) => {
-                skips.push(pos);
-                skips.push(next_pos);
+            ('\\', Some((_, '\n'))) => {
                 let _ = s.next();
+                let mut width = 2;
 
-                while let Some((pos, c)) = s.clone().next() {
+                while let Some((_, c)) = s.clone().next() {
                     if matches!(c, ' ' | '\n' | '\t') {
-                        skips.push(pos);
+                        width += 1;
                         let _ = s.next();
                     } else {
                         break;
                     }
                 }
+
+                width_mappings.push(InnerWidthMapping::new(pos, width, 0));
             }
-            ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
-                skips.push(next_pos);
+            ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
+                width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
                 let _ = s.next();
             }
             ('\\', Some((_, 'x'))) => {
-                for _ in 0..3 {
-                    // consume `\xAB` literal
-                    if let Some((pos, _)) = s.next() {
-                        skips.push(pos);
-                    } else {
-                        break;
-                    }
-                }
+                // consume `\xAB` literal
+                s.nth(2);
+                width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
             }
             ('\\', Some((_, 'u'))) => {
-                if let Some((pos, _)) = s.next() {
-                    skips.push(pos);
-                }
-                if let Some((next_pos, next_c)) = s.next() {
+                let mut width = 2;
+                let _ = s.next();
+
+                if let Some((_, next_c)) = s.next() {
                     if next_c == '{' {
                         // consume up to 6 hexanumeric chars
                         let digits_len =
@@ -881,19 +911,18 @@ fn find_skips_from_snippet(
                         let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
 
                         // skip '{' and '}' also
-                        for pos in (next_pos..).take(required_skips + 2) {
-                            skips.push(pos)
-                        }
+                        width += required_skips + 2;
 
                         s.nth(digits_len);
                     } else if next_c.is_digit(16) {
-                        skips.push(next_pos);
+                        width += 1;
+
                         // We suggest adding `{` and `}` when appropriate, accept it here as if
                         // it were correct
                         let mut i = 0; // consume up to 6 hexanumeric chars
-                        while let (Some((next_pos, c)), _) = (s.next(), i < 6) {
+                        while let (Some((_, c)), _) = (s.next(), i < 6) {
                             if c.is_digit(16) {
-                                skips.push(next_pos);
+                                width += 1;
                             } else {
                                 break;
                             }
@@ -901,11 +930,13 @@ fn find_skips_from_snippet(
                         }
                     }
                 }
+
+                width_mappings.push(InnerWidthMapping::new(pos, width, 1));
             }
             _ => {}
         }
     }
-    (skips, true)
+    (width_mappings, true)
 }
 
 // Assert a reasonable size for `Piece`