about summary refs log tree commit diff
path: root/compiler/rustc_parse/src/lexer
diff options
context:
space:
mode:
authorEsteban Küber <esteban@kuber.com.ar>2021-01-23 12:48:31 -0800
committerEsteban Küber <esteban@kuber.com.ar>2021-02-03 13:34:28 -0800
commit3b5d018ebb6ee8aa6181ebf1cf9ba0cb407f622f (patch)
treee714dab8b842b3abe0f3507ae61e250bb2f46105 /compiler/rustc_parse/src/lexer
parentb81f5811f96fe750ab28c15219d1b0dba6b1dc90 (diff)
downloadrust-3b5d018ebb6ee8aa6181ebf1cf9ba0cb407f622f.tar.gz
rust-3b5d018ebb6ee8aa6181ebf1cf9ba0cb407f622f.zip
Handle `Span`s for byte and raw strings and add more detail
Diffstat (limited to 'compiler/rustc_parse/src/lexer')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs30
-rw-r--r--compiler/rustc_parse/src/lexer/unescape_error_reporting.rs186
2 files changed, 136 insertions, 80 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index b5b34c7338d..4a638ec3f80 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -14,7 +14,7 @@ mod tokentrees;
 mod unescape_error_reporting;
 mod unicode_chars;
 
-use unescape_error_reporting::{emit_unescape_error, push_escaped_char};
+use unescape_error_reporting::{emit_unescape_error, escaped_char};
 
 #[derive(Clone, Debug)]
 pub struct UnmatchedBrace {
@@ -122,11 +122,9 @@ impl<'a> StringReader<'a> {
         m: &str,
         c: char,
     ) -> DiagnosticBuilder<'a> {
-        let mut m = m.to_string();
-        m.push_str(": ");
-        push_escaped_char(&mut m, c);
-
-        self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
+        self.sess
+            .span_diagnostic
+            .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
     }
 
     /// Turns simple `rustc_lexer::TokenKind` enum into a rich
@@ -421,7 +419,7 @@ impl<'a> StringReader<'a> {
         let content_start = start + BytePos(prefix_len);
         let content_end = suffix_start - BytePos(postfix_len);
         let id = self.symbol_from_to(content_start, content_end);
-        self.validate_literal_escape(mode, content_start, content_end);
+        self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len);
         (lit_kind, id)
     }
 
@@ -525,17 +523,29 @@ impl<'a> StringReader<'a> {
         .raise();
     }
 
-    fn validate_literal_escape(&self, mode: Mode, content_start: BytePos, content_end: BytePos) {
+    fn validate_literal_escape(
+        &self,
+        mode: Mode,
+        content_start: BytePos,
+        content_end: BytePos,
+        prefix_len: u32,
+        postfix_len: u32,
+    ) {
         let lit_content = self.str_from_to(content_start, content_end);
         unescape::unescape_literal(lit_content, mode, &mut |range, result| {
             // Here we only check for errors. The actual unescaping is done later.
             if let Err(err) = result {
-                let span_with_quotes =
-                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1));
+                let span_with_quotes = self
+                    .mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len));
+                let (start, end) = (range.start as u32, range.end as u32);
+                let lo = content_start + BytePos(start);
+                let hi = lo + BytePos(end - start);
+                let span = self.mk_sp(lo, hi);
                 emit_unescape_error(
                     &self.sess.span_diagnostic,
                     lit_content,
                     span_with_quotes,
+                    span,
                     mode,
                     range,
                     err,
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 47d317f9188..a580f0c55d0 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -13,6 +13,8 @@ pub(crate) fn emit_unescape_error(
     lit: &str,
     // full span of the literal, including quotes
     span_with_quotes: Span,
+    // interior span of the literal, without quotes
+    span: Span,
     mode: Mode,
     // range of the error inside `lit`
     range: Range<usize>,
@@ -26,13 +28,6 @@ pub(crate) fn emit_unescape_error(
         range,
         error
     );
-    let span = {
-        let Range { start, end } = range;
-        let (start, end) = (start as u32, end as u32);
-        let lo = span_with_quotes.lo() + BytePos(start + 1);
-        let hi = lo + BytePos(end - start);
-        span_with_quotes.with_lo(lo).with_hi(hi)
-    };
     let last_char = || {
         let c = lit[range.clone()].chars().rev().next().unwrap();
         let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
@@ -42,20 +37,22 @@ pub(crate) fn emit_unescape_error(
         EscapeError::LoneSurrogateUnicodeEscape => {
             handler
                 .struct_span_err(span, "invalid unicode character escape")
+                .span_label(span, "invalid escape")
                 .help("unicode escape must not be a surrogate")
                 .emit();
         }
         EscapeError::OutOfRangeUnicodeEscape => {
             handler
                 .struct_span_err(span, "invalid unicode character escape")
+                .span_label(span, "invalid escape")
                 .help("unicode escape must be at most 10FFFF")
                 .emit();
         }
         EscapeError::MoreThanOneChar => {
-            let msg = if mode.is_bytes() {
-                "if you meant to write a byte string literal, use double quotes"
+            let (prefix, msg) = if mode.is_bytes() {
+                ("b", "if you meant to write a byte string literal, use double quotes")
             } else {
-                "if you meant to write a `str` literal, use double quotes"
+                ("", "if you meant to write a `str` literal, use double quotes")
             };
 
             handler
@@ -66,31 +63,44 @@ pub(crate) fn emit_unescape_error(
                 .span_suggestion(
                     span_with_quotes,
                     msg,
-                    format!("\"{}\"", lit),
+                    format!("{}\"{}\"", prefix, lit),
                     Applicability::MachineApplicable,
                 )
                 .emit();
         }
         EscapeError::EscapeOnlyChar => {
-            let (c, _span) = last_char();
+            let (c, char_span) = last_char();
 
-            let mut msg = if mode.is_bytes() {
-                "byte constant must be escaped: "
+            let msg = if mode.is_bytes() {
+                "byte constant must be escaped"
             } else {
-                "character constant must be escaped: "
-            }
-            .to_string();
-            push_escaped_char(&mut msg, c);
-
-            handler.span_err(span, msg.as_str())
+                "character constant must be escaped"
+            };
+            handler
+                .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
+                .span_suggestion(
+                    char_span,
+                    "escape the character",
+                    c.escape_default().to_string(),
+                    Applicability::MachineApplicable,
+                )
+                .emit()
         }
         EscapeError::BareCarriageReturn => {
             let msg = if mode.in_double_quotes() {
-                "bare CR not allowed in string, use \\r instead"
+                "bare CR not allowed in string, use `\\r` instead"
             } else {
-                "character constant must be escaped: \\r"
+                "character constant must be escaped: `\\r`"
             };
-            handler.span_err(span, msg);
+            handler
+                .struct_span_err(span, msg)
+                .span_suggestion(
+                    span,
+                    "escape the character",
+                    "\\r".to_string(),
+                    Applicability::MachineApplicable,
+                )
+                .emit();
         }
         EscapeError::BareCarriageReturnInRawString => {
             assert!(mode.in_double_quotes());
@@ -102,21 +112,22 @@ pub(crate) fn emit_unescape_error(
 
             let label =
                 if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" };
-            let mut msg = label.to_string();
-            msg.push_str(": ");
-            push_escaped_char(&mut msg, c);
-
-            let mut diag = handler.struct_span_err(span, msg.as_str());
+            let ec = escaped_char(c);
+            let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
             diag.span_label(span, label);
             if c == '{' || c == '}' && !mode.is_bytes() {
                 diag.help(
-                    "if used in a formatting string, \
-                           curly braces are escaped with `{{` and `}}`",
+                    "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
                 );
             } else if c == '\r' {
                 diag.help(
-                    "this is an isolated carriage return; \
-                           consider checking your editor and version control settings",
+                    "this is an isolated carriage return; consider checking your editor and \
+                     version control settings",
+                );
+            } else {
+                diag.help(
+                    "for more information, visit \
+                     <https://static.rust-lang.org/doc/master/reference.html#literals>",
                 );
             }
             diag.emit();
@@ -127,45 +138,70 @@ pub(crate) fn emit_unescape_error(
         EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
             let (c, span) = last_char();
 
-            let mut msg = if error == EscapeError::InvalidCharInHexEscape {
-                "invalid character in numeric character escape: "
+            let msg = if error == EscapeError::InvalidCharInHexEscape {
+                "invalid character in numeric character escape"
             } else {
-                "invalid character in unicode escape: "
-            }
-            .to_string();
-            push_escaped_char(&mut msg, c);
+                "invalid character in unicode escape"
+            };
+            let c = escaped_char(c);
 
-            handler.span_err(span, msg.as_str())
+            handler
+                .struct_span_err(span, &format!("{}: `{}`", msg, c))
+                .span_label(span, msg)
+                .emit();
         }
         EscapeError::NonAsciiCharInByte => {
             assert!(mode.is_bytes());
-            let (_c, span) = last_char();
-            handler.span_err(
-                span,
-                "byte constant must be ASCII. \
-                                    Use a \\xHH escape for a non-ASCII byte",
-            )
+            let (c, span) = last_char();
+            handler
+                .struct_span_err(span, "non-ASCII character in byte constant")
+                .span_label(span, "byte constant must be ASCII")
+                .span_suggestion(
+                    span,
+                    "use a \\xHH escape for a non-ASCII byte",
+                    format!("\\x{:X}", c as u32),
+                    Applicability::MachineApplicable,
+                )
+                .emit();
         }
         EscapeError::NonAsciiCharInByteString => {
             assert!(mode.is_bytes());
             let (_c, span) = last_char();
-            handler.span_err(span, "raw byte string must be ASCII")
+            handler
+                .struct_span_err(span, "raw byte string must be ASCII")
+                .span_label(span, "must be ASCII")
+                .emit();
+        }
+        EscapeError::OutOfRangeHexEscape => {
+            handler
+                .struct_span_err(span, "out of range hex escape")
+                .span_label(span, "must be a character in the range [\\x00-\\x7f]")
+                .emit();
         }
-        EscapeError::OutOfRangeHexEscape => handler.span_err(
-            span,
-            "this form of character escape may only be used \
-                                    with characters in the range [\\x00-\\x7f]",
-        ),
         EscapeError::LeadingUnderscoreUnicodeEscape => {
-            let (_c, span) = last_char();
-            handler.span_err(span, "invalid start of unicode escape")
+            let (c, span) = last_char();
+            let msg = "invalid start of unicode escape";
+            handler
+                .struct_span_err(span, &format!("{}: `{}`", msg, c))
+                .span_label(span, msg)
+                .emit();
         }
         EscapeError::OverlongUnicodeEscape => {
-            handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)")
-        }
-        EscapeError::UnclosedUnicodeEscape => {
-            handler.span_err(span, "unterminated unicode escape (needed a `}`)")
+            handler
+                .struct_span_err(span, "overlong unicode escape")
+                .span_label(span, "must have at most 6 hex digits")
+                .emit();
         }
+        EscapeError::UnclosedUnicodeEscape => handler
+            .struct_span_err(span, "unterminated unicode escape")
+            .span_label(span, "missing a closing `}`")
+            .span_suggestion_verbose(
+                span.shrink_to_hi(),
+                "terminate the unicode escape",
+                "}".to_string(),
+                Applicability::MaybeIncorrect,
+            )
+            .emit(),
         EscapeError::NoBraceInUnicodeEscape => {
             let msg = "incorrect unicode escape sequence";
             let mut diag = handler.struct_span_err(span, msg);
@@ -195,28 +231,38 @@ pub(crate) fn emit_unescape_error(
 
             diag.emit();
         }
-        EscapeError::UnicodeEscapeInByte => handler.span_err(
-            span,
-            "unicode escape sequences cannot be used \
-                                    as a byte or in a byte string",
-        ),
+        EscapeError::UnicodeEscapeInByte => {
+            let msg = "unicode escape in byte string";
+            handler
+                .struct_span_err(span, msg)
+                .span_label(span, msg)
+                .help("unicode escape sequences cannot be used as a byte or in a byte string")
+                .emit();
+        }
         EscapeError::EmptyUnicodeEscape => {
-            handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)")
+            handler
+                .struct_span_err(span, "empty unicode escape")
+                .span_label(span, "this escape must have at least 1 hex digit")
+                .emit();
+        }
+        EscapeError::ZeroChars => {
+            let msg = "empty character literal";
+            handler.struct_span_err(span, msg).span_label(span, msg).emit()
+        }
+        EscapeError::LoneSlash => {
+            let msg = "invalid trailing slash in literal";
+            handler.struct_span_err(span, msg).span_label(span, msg).emit();
         }
-        EscapeError::ZeroChars => handler.span_err(span, "empty character literal"),
-        EscapeError::LoneSlash => handler.span_err(span, "invalid trailing slash in literal"),
     }
 }
 
 /// Pushes a character to a message string for error reporting
-pub(crate) fn push_escaped_char(msg: &mut String, c: char) {
+pub(crate) fn escaped_char(c: char) -> String {
     match c {
         '\u{20}'..='\u{7e}' => {
             // Don't escape \, ' or " for user-facing messages
-            msg.push(c);
-        }
-        _ => {
-            msg.extend(c.escape_default());
+            c.to_string()
         }
+        _ => c.escape_default().to_string(),
     }
 }