about summary refs log tree commit diff
path: root/compiler/rustc_parse/src/lexer
diff options
context:
space:
mode:
authorclubby789 <jamie@hill-daniel.co.uk>2023-01-29 13:37:05 +0000
committerclubby789 <jamie@hill-daniel.co.uk>2023-02-06 14:40:35 +0000
commit521c5f36d69b9056c0147502d12426fbd9b6c9c4 (patch)
treed5424bdff9591b5e7ff449e5d6a06ac5cd8ae060 /compiler/rustc_parse/src/lexer
parente7813fee92c56621d08e8dbe83948d9f4a30a9ec (diff)
downloadrust-521c5f36d69b9056c0147502d12426fbd9b6c9c4.tar.gz
rust-521c5f36d69b9056c0147502d12426fbd9b6c9c4.zip
Migrate `rustc_parse` to derive diagnostics
Diffstat (limited to 'compiler/rustc_parse/src/lexer')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs159
-rw-r--r--compiler/rustc_parse/src/lexer/unescape_error_reporting.rs248
-rw-r--r--compiler/rustc_parse/src/lexer/unicode_chars.rs60
3 files changed, 150 insertions, 317 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index e957224a033..bd998ed91d9 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,11 +1,10 @@
+use crate::errors;
 use crate::lexer::unicode_chars::UNICODE_ARRAY;
 use rustc_ast::ast::{self, AttrStyle};
 use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
-use rustc_errors::{
-    error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult, StashKey,
-};
+use rustc_errors::{error_code, Applicability, DiagnosticBuilder, PResult, StashKey};
 use rustc_lexer::unescape::{self, Mode};
 use rustc_lexer::Cursor;
 use rustc_lexer::{Base, DocStyle, RawStrError};
@@ -151,7 +150,7 @@ impl<'a> StringReader<'a> {
                     let span = self.mk_sp(start, self.pos);
                     self.sess.symbol_gallery.insert(sym, span);
                     if !sym.can_be_raw() {
-                        self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
+                        self.sess.emit_err(errors::CannotBeRawIdent { span, ident: sym });
                     }
                     self.sess.raw_identifier_spans.borrow_mut().push(span);
                     token::Ident(sym, true)
@@ -262,27 +261,24 @@ impl<'a> StringReader<'a> {
                         self.nbsp_is_whitespace = true;
                     }
                     let repeats = it.take_while(|c1| *c1 == c).count();
-                    let mut err =
-                        self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
                     // FIXME: the lexer could be used to turn the ASCII version of unicode
                     // homoglyphs, instead of keeping a table in `check_for_substitution`into the
                     // token. Ideally, this should be inside `rustc_lexer`. However, we should
                     // first remove compound tokens like `<<` from `rustc_lexer`, and then add
                     // fancier error recovery to it, as there will be less overall work to do this
                     // way.
-                    let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1);
-                    if c == '\x00' {
-                        err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
-                    }
-                    if repeats > 0 {
-                        if repeats == 1 {
-                            err.note(format!("character appears once more"));
-                        } else {
-                            err.note(format!("character appears {repeats} more times"));
-                        }
-                        swallow_next_invalid = repeats;
-                    }
-                    err.emit();
+                    let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1);
+                    self.sess.emit_err(errors::UnknownTokenStart {
+                        span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
+                        escaped: escaped_char(c),
+                        sugg,
+                        null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None},
+                        repeat: if repeats > 0 {
+                            swallow_next_invalid = repeats;
+                            Some(errors::UnknownTokenRepeat { repeats })
+                        } else {None}
+                    });
+
                     if let Some(token) = token {
                         token
                     } else {
@@ -297,26 +293,6 @@ impl<'a> StringReader<'a> {
         }
     }
 
-    /// Report a fatal lexical error with a given span.
-    fn fatal_span(&self, sp: Span, m: &str) -> ! {
-        self.sess.span_diagnostic.span_fatal(sp, m)
-    }
-
-    /// Report a lexical error with a given span.
-    fn err_span(&self, sp: Span, m: &str) {
-        self.sess.span_diagnostic.struct_span_err(sp, m).emit();
-    }
-
-    /// Report a fatal error spanning [`from_pos`, `to_pos`).
-    fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
-        self.fatal_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
-    /// Report a lexical error spanning [`from_pos`, `to_pos`).
-    fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
-        self.err_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
     fn struct_fatal_span_char(
         &self,
         from_pos: BytePos,
@@ -329,18 +305,6 @@ impl<'a> StringReader<'a> {
             .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
     }
 
-    fn struct_err_span_char(
-        &self,
-        from_pos: BytePos,
-        to_pos: BytePos,
-        m: &str,
-        c: char,
-    ) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
-        self.sess
-            .span_diagnostic
-            .struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
-    }
-
     /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
     /// complain about it.
     fn lint_unicode_text_flow(&self, start: BytePos) {
@@ -368,14 +332,12 @@ impl<'a> StringReader<'a> {
     ) -> TokenKind {
         if content.contains('\r') {
             for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
-                self.err_span_(
+                let span = self.mk_sp(
                     content_start + BytePos(idx as u32),
                     content_start + BytePos(idx as u32 + 1),
-                    match comment_kind {
-                        CommentKind::Line => "bare CR not allowed in doc-comment",
-                        CommentKind::Block => "bare CR not allowed in block doc-comment",
-                    },
                 );
+                let block = matches!(comment_kind, CommentKind::Block);
+                self.sess.emit_err(errors::CrDocComment { span, block });
             }
         }
 
@@ -454,26 +416,20 @@ impl<'a> StringReader<'a> {
             }
             rustc_lexer::LiteralKind::Int { base, empty_int } => {
                 if empty_int {
-                    self.sess
-                        .span_diagnostic
-                        .struct_span_err_with_code(
-                            self.mk_sp(start, end),
-                            "no valid digits found for number",
-                            error_code!(E0768),
-                        )
-                        .emit();
+                    let span = self.mk_sp(start, end);
+                    self.sess.emit_err(errors::NoDigitsLiteral { span });
                     (token::Integer, sym::integer(0))
                 } else {
                     if matches!(base, Base::Binary | Base::Octal) {
                         let base = base as u32;
                         let s = self.str_from_to(start + BytePos(2), end);
                         for (idx, c) in s.char_indices() {
+                            let span = self.mk_sp(
+                                start + BytePos::from_usize(2 + idx),
+                                start + BytePos::from_usize(2 + idx + c.len_utf8()),
+                            );
                             if c != '_' && c.to_digit(base).is_none() {
-                                self.err_span_(
-                                    start + BytePos::from_usize(2 + idx),
-                                    start + BytePos::from_usize(2 + idx + c.len_utf8()),
-                                    &format!("invalid digit for a base {} literal", base),
-                                );
+                                self.sess.emit_err(errors::InvalidDigitLiteral { span, base });
                             }
                         }
                     }
@@ -482,19 +438,18 @@ impl<'a> StringReader<'a> {
             }
             rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
                 if empty_exponent {
-                    self.err_span_(start, self.pos, "expected at least one digit in exponent");
+                    let span = self.mk_sp(start, self.pos);
+                    self.sess.emit_err(errors::EmptyExponentFloat { span });
                 }
-                match base {
-                    Base::Hexadecimal => {
-                        self.err_span_(start, end, "hexadecimal float literal is not supported")
-                    }
-                    Base::Octal => {
-                        self.err_span_(start, end, "octal float literal is not supported")
-                    }
-                    Base::Binary => {
-                        self.err_span_(start, end, "binary float literal is not supported")
-                    }
-                    _ => {}
+                let base = match base {
+                    Base::Hexadecimal => Some("hexadecimal"),
+                    Base::Octal => Some("octal"),
+                    Base::Binary => Some("binary"),
+                    _ => None,
+                };
+                if let Some(base) = base {
+                    let span = self.mk_sp(start, end);
+                    self.sess.emit_err(errors::FloatLiteralUnsupportedBase { span, base });
                 }
                 (token::Float, self.symbol_from_to(start, end))
             }
@@ -644,54 +599,34 @@ impl<'a> StringReader<'a> {
     // identifier tokens.
     fn report_unknown_prefix(&self, start: BytePos) {
         let prefix_span = self.mk_sp(start, self.pos);
-        let prefix_str = self.str_from_to(start, self.pos);
-        let msg = format!("prefix `{}` is unknown", prefix_str);
+        let prefix = self.str_from_to(start, self.pos);
 
         let expn_data = prefix_span.ctxt().outer_expn_data();
 
         if expn_data.edition >= Edition::Edition2021 {
             // In Rust 2021, this is a hard error.
-            let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg);
-            err.span_label(prefix_span, "unknown prefix");
-            if prefix_str == "rb" {
-                err.span_suggestion_verbose(
-                    prefix_span,
-                    "use `br` for a raw byte string",
-                    "br",
-                    Applicability::MaybeIncorrect,
-                );
+            let sugg = if prefix == "rb" {
+                Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
             } else if expn_data.is_root() {
-                err.span_suggestion_verbose(
-                    prefix_span.shrink_to_hi(),
-                    "consider inserting whitespace here",
-                    " ",
-                    Applicability::MaybeIncorrect,
-                );
-            }
-            err.note("prefixed identifiers and literals are reserved since Rust 2021");
-            err.emit();
+                Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
+            } else {
+                None
+            };
+            self.sess.emit_err(errors::UnknownPrefix { span: prefix_span, prefix, sugg });
         } else {
             // Before Rust 2021, only emit a lint for migration.
             self.sess.buffer_lint_with_diagnostic(
                 &RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
                 prefix_span,
                 ast::CRATE_NODE_ID,
-                &msg,
+                &format!("prefix `{prefix}` is unknown"),
                 BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
             );
         }
     }
 
-    fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! {
-        self.fatal_span_(
-            start,
-            self.pos,
-            &format!(
-                "too many `#` symbols: raw strings may be delimited \
-                by up to 255 `#` symbols, but found {}",
-                found
-            ),
-        )
+    fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
+        self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
     }
 
     fn cook_quoted(
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 6373f5b4fd6..0d12ec6081d 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -3,10 +3,12 @@
 use std::iter::once;
 use std::ops::Range;
 
-use rustc_errors::{pluralize, Applicability, Handler};
+use rustc_errors::{Applicability, Handler};
 use rustc_lexer::unescape::{EscapeError, Mode};
 use rustc_span::{BytePos, Span};
 
+use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, UnescapeError};
+
 pub(crate) fn emit_unescape_error(
     handler: &Handler,
     // interior part of the literal, without quotes
@@ -31,53 +33,32 @@ pub(crate) fn emit_unescape_error(
     };
     match error {
         EscapeError::LoneSurrogateUnicodeEscape => {
-            handler
-                .struct_span_err(span, "invalid unicode character escape")
-                .span_label(span, "invalid escape")
-                .help("unicode escape must not be a surrogate")
-                .emit();
+            handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: true });
         }
         EscapeError::OutOfRangeUnicodeEscape => {
-            handler
-                .struct_span_err(span, "invalid unicode character escape")
-                .span_label(span, "invalid escape")
-                .help("unicode escape must be at most 10FFFF")
-                .emit();
+            handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: false });
         }
         EscapeError::MoreThanOneChar => {
             use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
+            let mut sugg = None;
+            let mut note = None;
 
-            let mut has_help = false;
-            let mut handler = handler.struct_span_err(
-                span_with_quotes,
-                "character literal may only contain one codepoint",
-            );
-
-            if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
-                let escaped_marks =
-                    lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
-                handler.span_note(
-                    span,
-                    &format!(
-                        "this `{}` is followed by the combining mark{} `{}`",
-                        lit.chars().next().unwrap(),
-                        pluralize!(escaped_marks.len()),
-                        escaped_marks.join(""),
-                    ),
-                );
+            let lit_chars = lit.chars().collect::<Vec<_>>();
+            let (first, rest) = lit_chars.split_first().unwrap();
+            if rest.iter().copied().all(is_combining_mark) {
                 let normalized = lit.nfc().to_string();
                 if normalized.chars().count() == 1 {
-                    has_help = true;
-                    handler.span_suggestion(
-                        span,
-                        &format!(
-                            "consider using the normalized form `{}` of this character",
-                            normalized.chars().next().unwrap().escape_default()
-                        ),
-                        normalized,
-                        Applicability::MachineApplicable,
-                    );
+                    let ch = normalized.chars().next().unwrap().escape_default().to_string();
+                    sugg = Some(MoreThanOneCharSugg::NormalizedForm { span, ch, normalized });
                 }
+                let escaped_marks =
+                    rest.iter().map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
+                note = Some(MoreThanOneCharNote::AllCombining {
+                    span,
+                    chr: format!("{first}"),
+                    len: escaped_marks.len(),
+                    escaped_marks: escaped_marks.join(""),
+                });
             } else {
                 let printable: Vec<char> = lit
                     .chars()
@@ -87,32 +68,18 @@ pub(crate) fn emit_unescape_error(
                     })
                     .collect();
 
-                if let [ch] = printable.as_slice() {
-                    has_help = true;
-
-                    handler.span_note(
+                if let &[ch] = printable.as_slice() {
+                    sugg =
+                        Some(MoreThanOneCharSugg::RemoveNonPrinting { span, ch: ch.to_string() });
+                    note = Some(MoreThanOneCharNote::NonPrinting {
                         span,
-                        &format!(
-                            "there are non-printing characters, the full sequence is `{}`",
-                            lit.escape_default(),
-                        ),
-                    );
-
-                    handler.span_suggestion(
-                        span,
-                        "consider removing the non-printing characters",
-                        ch,
-                        Applicability::MaybeIncorrect,
-                    );
+                        escaped: lit.escape_default().to_string(),
+                    });
                 }
-            }
-
-            if !has_help {
-                let (prefix, msg) = if mode.is_byte() {
-                    ("b", "if you meant to write a byte string literal, use double quotes")
-                } else {
-                    ("", "if you meant to write a `str` literal, use double quotes")
-                };
+            };
+            let sugg = sugg.unwrap_or_else(|| {
+                let is_byte = mode.is_byte();
+                let prefix = if is_byte { "b" } else { "" };
                 let mut escaped = String::with_capacity(lit.len());
                 let mut chrs = lit.chars().peekable();
                 while let Some(first) = chrs.next() {
@@ -129,54 +96,32 @@ pub(crate) fn emit_unescape_error(
                         (c, _) => escaped.push(c),
                     };
                 }
-                handler.span_suggestion(
-                    span_with_quotes,
-                    msg,
-                    format!("{prefix}\"{escaped}\""),
-                    Applicability::MachineApplicable,
-                );
-            }
-
-            handler.emit();
+                let sugg = format!("{prefix}\"{escaped}\"");
+                MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg }
+            });
+            handler.emit_err(UnescapeError::MoreThanOneChar {
+                span: span_with_quotes,
+                note,
+                suggestion: sugg,
+            });
         }
         EscapeError::EscapeOnlyChar => {
             let (c, char_span) = last_char();
-
-            let msg = if mode.is_byte() {
-                "byte constant must be escaped"
-            } else {
-                "character constant must be escaped"
-            };
-            handler
-                .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
-                .span_suggestion(
-                    char_span,
-                    "escape the character",
-                    c.escape_default(),
-                    Applicability::MachineApplicable,
-                )
-                .emit();
+            handler.emit_err(UnescapeError::EscapeOnlyChar {
+                span,
+                char_span,
+                escaped_sugg: c.escape_default().to_string(),
+                escaped_msg: escaped_char(c),
+                byte: mode.is_byte(),
+            });
         }
         EscapeError::BareCarriageReturn => {
-            let msg = if mode.in_double_quotes() {
-                "bare CR not allowed in string, use `\\r` instead"
-            } else {
-                "character constant must be escaped: `\\r`"
-            };
-            handler
-                .struct_span_err(span, msg)
-                .span_suggestion(
-                    span,
-                    "escape the character",
-                    "\\r",
-                    Applicability::MachineApplicable,
-                )
-                .emit();
+            let double_quotes = mode.in_double_quotes();
+            handler.emit_err(UnescapeError::BareCr { span, double_quotes });
         }
         EscapeError::BareCarriageReturnInRawString => {
             assert!(mode.in_double_quotes());
-            let msg = "bare CR not allowed in raw string";
-            handler.span_err(span, msg);
+            handler.emit_err(UnescapeError::BareCrRawString(span));
         }
         EscapeError::InvalidEscape => {
             let (c, span) = last_char();
@@ -213,22 +158,13 @@ pub(crate) fn emit_unescape_error(
             diag.emit();
         }
         EscapeError::TooShortHexEscape => {
-            handler.span_err(span, "numeric character escape is too short");
+            handler.emit_err(UnescapeError::TooShortHexEscape(span));
         }
         EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
             let (c, span) = last_char();
-
-            let msg = if error == EscapeError::InvalidCharInHexEscape {
-                "invalid character in numeric character escape"
-            } else {
-                "invalid character in unicode escape"
-            };
-            let c = escaped_char(c);
-
-            handler
-                .struct_span_err(span, &format!("{}: `{}`", msg, c))
-                .span_label(span, msg)
-                .emit();
+            let is_hex = error == EscapeError::InvalidCharInHexEscape;
+            let ch = escaped_char(c);
+            handler.emit_err(UnescapeError::InvalidCharInEscape { span, is_hex, ch });
         }
         EscapeError::NonAsciiCharInByte => {
             let (c, span) = last_char();
@@ -278,41 +214,22 @@ pub(crate) fn emit_unescape_error(
             err.emit();
         }
         EscapeError::OutOfRangeHexEscape => {
-            handler
-                .struct_span_err(span, "out of range hex escape")
-                .span_label(span, "must be a character in the range [\\x00-\\x7f]")
-                .emit();
+            handler.emit_err(UnescapeError::OutOfRangeHexEscape(span));
         }
         EscapeError::LeadingUnderscoreUnicodeEscape => {
             let (c, span) = last_char();
-            let msg = "invalid start of unicode escape";
-            handler
-                .struct_span_err(span, &format!("{}: `{}`", msg, c))
-                .span_label(span, msg)
-                .emit();
+            handler.emit_err(UnescapeError::LeadingUnderscoreUnicodeEscape {
+                span,
+                ch: escaped_char(c),
+            });
         }
         EscapeError::OverlongUnicodeEscape => {
-            handler
-                .struct_span_err(span, "overlong unicode escape")
-                .span_label(span, "must have at most 6 hex digits")
-                .emit();
+            handler.emit_err(UnescapeError::OverlongUnicodeEscape(span));
         }
         EscapeError::UnclosedUnicodeEscape => {
-            handler
-                .struct_span_err(span, "unterminated unicode escape")
-                .span_label(span, "missing a closing `}`")
-                .span_suggestion_verbose(
-                    span.shrink_to_hi(),
-                    "terminate the unicode escape",
-                    "}",
-                    Applicability::MaybeIncorrect,
-                )
-                .emit();
+            handler.emit_err(UnescapeError::UnclosedUnicodeEscape(span, span.shrink_to_hi()));
         }
         EscapeError::NoBraceInUnicodeEscape => {
-            let msg = "incorrect unicode escape sequence";
-            let mut diag = handler.struct_span_err(span, msg);
-
             let mut suggestion = "\\u{".to_owned();
             let mut suggestion_len = 0;
             let (c, char_span) = last_char();
@@ -322,54 +239,37 @@ pub(crate) fn emit_unescape_error(
                 suggestion_len += c.len_utf8();
             }
 
-            if suggestion_len > 0 {
+            let (label, sub) = if suggestion_len > 0 {
                 suggestion.push('}');
                 let hi = char_span.lo() + BytePos(suggestion_len as u32);
-                diag.span_suggestion(
-                    span.with_hi(hi),
-                    "format of unicode escape sequences uses braces",
-                    suggestion,
-                    Applicability::MaybeIncorrect,
-                );
+                (None, NoBraceUnicodeSub::Suggestion { span: span.with_hi(hi), suggestion })
             } else {
-                diag.span_label(span, msg);
-                diag.help("format of unicode escape sequences is `\\u{...}`");
-            }
-
-            diag.emit();
+                (Some(span), NoBraceUnicodeSub::Help)
+            };
+            handler.emit_err(UnescapeError::NoBraceInUnicodeEscape { span, label, sub });
         }
         EscapeError::UnicodeEscapeInByte => {
-            let msg = "unicode escape in byte string";
-            handler
-                .struct_span_err(span, msg)
-                .span_label(span, msg)
-                .help("unicode escape sequences cannot be used as a byte or in a byte string")
-                .emit();
+            handler.emit_err(UnescapeError::UnicodeEscapeInByte(span));
         }
         EscapeError::EmptyUnicodeEscape => {
-            handler
-                .struct_span_err(span, "empty unicode escape")
-                .span_label(span, "this escape must have at least 1 hex digit")
-                .emit();
+            handler.emit_err(UnescapeError::EmptyUnicodeEscape(span));
         }
         EscapeError::ZeroChars => {
-            let msg = "empty character literal";
-            handler.struct_span_err(span, msg).span_label(span, msg).emit();
+            handler.emit_err(UnescapeError::ZeroChars(span));
         }
         EscapeError::LoneSlash => {
-            let msg = "invalid trailing slash in literal";
-            handler.struct_span_err(span, msg).span_label(span, msg).emit();
+            handler.emit_err(UnescapeError::LoneSlash(span));
         }
         EscapeError::UnskippedWhitespaceWarning => {
             let (c, char_span) = last_char();
-            let msg =
-                format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
-            handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
+            handler.emit_warning(UnescapeError::UnskippedWhitespace {
+                span,
+                ch: escaped_char(c),
+                char_span,
+            });
         }
         EscapeError::MultipleSkippedLinesWarning => {
-            let msg = "multiple lines skipped by escaped newline";
-            let bottom_msg = "skipping everything up to and including this point";
-            handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit();
+            handler.emit_warning(UnescapeError::MultipleSkippedLinesWarning(span));
         }
     }
 }
diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs
index 34d003ccfa7..d4f971d5bc8 100644
--- a/compiler/rustc_parse/src/lexer/unicode_chars.rs
+++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs
@@ -2,8 +2,10 @@
 //! <https://www.unicode.org/Public/security/10.0.0/confusables.txt>
 
 use super::StringReader;
-use crate::token::{self, Delimiter};
-use rustc_errors::{Applicability, Diagnostic};
+use crate::{
+    errors::TokenSubstitution,
+    token::{self, Delimiter},
+};
 use rustc_span::{symbol::kw, BytePos, Pos, Span};
 
 #[rustfmt::skip] // for line breaks
@@ -338,48 +340,44 @@ pub(super) fn check_for_substitution<'a>(
     reader: &StringReader<'a>,
     pos: BytePos,
     ch: char,
-    err: &mut Diagnostic,
     count: usize,
-) -> Option<token::TokenKind> {
-    let &(_, u_name, ascii_str) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?;
+) -> (Option<token::TokenKind>, Option<TokenSubstitution>) {
+    let Some(&(_, u_name, ascii_str)) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) else {
+        return (None, None);
+    };
 
     let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count));
 
     let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else {
         let msg = format!("substitution character not found for '{}'", ch);
         reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
-        return None;
+        return (None, None);
     };
 
     // special help suggestion for "directed" double quotes
-    if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') {
-        let msg = format!(
-            "Unicode characters '“' (Left Double Quotation Mark) and \
-             '”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
-            ascii_str, ascii_name
-        );
-        err.span_suggestion(
-            Span::with_root_ctxt(
-                pos,
-                pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()),
-            ),
-            &msg,
-            format!("\"{}\"", s),
-            Applicability::MaybeIncorrect,
+    let sugg = if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') {
+        let span = Span::with_root_ctxt(
+            pos,
+            pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()),
         );
+        Some(TokenSubstitution::DirectedQuotes {
+            span,
+            suggestion: format!("\"{s}\""),
+            ascii_str,
+            ascii_name,
+        })
     } else {
-        let msg = format!(
-            "Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
-            ch, u_name, ascii_str, ascii_name
-        );
-        err.span_suggestion(
+        let suggestion = ascii_str.to_string().repeat(count);
+        Some(TokenSubstitution::Other {
             span,
-            &msg,
-            ascii_str.to_string().repeat(count),
-            Applicability::MaybeIncorrect,
-        );
-    }
-    token.clone()
+            suggestion,
+            ch: ch.to_string(),
+            u_name,
+            ascii_str,
+            ascii_name,
+        })
+    };
+    (token.clone(), sugg)
 }
 
 /// Extract string if found at current position with given delimiters