about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--compiler/rustc_ast/src/lib.rs1
-rw-r--r--compiler/rustc_ast/src/util/literal.rs39
-rw-r--r--compiler/rustc_lexer/src/unescape.rs45
3 files changed, 45 insertions, 40 deletions
diff --git a/compiler/rustc_ast/src/lib.rs b/compiler/rustc_ast/src/lib.rs
index 84fe9ad2672..21183121e15 100644
--- a/compiler/rustc_ast/src/lib.rs
+++ b/compiler/rustc_ast/src/lib.rs
@@ -16,6 +16,7 @@
 #![feature(min_specialization)]
 #![recursion_limit = "256"]
 #![feature(slice_internals)]
+#![feature(stmt_expr_attributes)]
 
 #[macro_use]
 extern crate rustc_macros;
diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index 1cc5ddfd8ee..224afbd553f 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -56,25 +56,30 @@ impl LitKind {
                 // new symbol because the string in the LitKind is different to the
                 // string in the token.
                 let s = symbol.as_str();
-                let symbol =
-                    if s.contains(&['\\', '\r']) {
-                        let mut buf = String::with_capacity(s.len());
-                        let mut error = Ok(());
-                        unescape_literal(&s, Mode::Str, &mut |_, unescaped_char| {
-                            match unescaped_char {
-                                Ok(c) => buf.push(c),
-                                Err(err) => {
-                                    if err.is_fatal() {
-                                        error = Err(LitError::LexerError);
-                                    }
+                let symbol = if s.contains(&['\\', '\r']) {
+                    let mut buf = String::with_capacity(s.len());
+                    let mut error = Ok(());
+                    // Force-inlining here is aggressive but the closure is
+                    // called on every char in the string, so it can be
+                    // hot in programs with many long strings.
+                    unescape_literal(
+                        &s,
+                        Mode::Str,
+                        &mut #[inline(always)]
+                        |_, unescaped_char| match unescaped_char {
+                            Ok(c) => buf.push(c),
+                            Err(err) => {
+                                if err.is_fatal() {
+                                    error = Err(LitError::LexerError);
                                 }
                             }
-                        });
-                        error?;
-                        Symbol::intern(&buf)
-                    } else {
-                        symbol
-                    };
+                        },
+                    );
+                    error?;
+                    Symbol::intern(&buf)
+                } else {
+                    symbol
+                };
                 LitKind::Str(symbol, ast::StrStyle::Cooked)
             }
             token::StrRaw(n) => {
diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index d789237e692..97f9588ae1e 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@@ -159,26 +159,8 @@ impl Mode {
     }
 }
 
-fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
-    if first_char != '\\' {
-        // Previous character was not a slash, and we don't expect it to be
-        // an escape-only character.
-        return match first_char {
-            '\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
-            '\r' => Err(EscapeError::BareCarriageReturn),
-            '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
-            '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
-            _ => {
-                if mode.is_bytes() && !first_char.is_ascii() {
-                    // Byte literal can't be a non-ascii character.
-                    return Err(EscapeError::NonAsciiCharInByte);
-                }
-                Ok(first_char)
-            }
-        };
-    }
-
-    // Previous character is '\\', try to unescape it.
+fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
+    // Previous character was '\\', unescape what follows.
 
     let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
 
@@ -270,9 +252,24 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
     Ok(res)
 }
 
+#[inline]
+fn ascii_check(first_char: char, mode: Mode) -> Result<char, EscapeError> {
+    if mode.is_bytes() && !first_char.is_ascii() {
+        // Byte literal can't be a non-ascii character.
+        Err(EscapeError::NonAsciiCharInByte)
+    } else {
+        Ok(first_char)
+    }
+}
+
 fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
     let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
-    let res = scan_escape(first_char, chars, mode)?;
+    let res = match first_char {
+        '\\' => scan_escape(chars, mode),
+        '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
+        '\r' => Err(EscapeError::BareCarriageReturn),
+        _ => ascii_check(first_char, mode),
+    }?;
     if chars.next().is_some() {
         return Err(EscapeError::MoreThanOneChar);
     }
@@ -303,12 +300,14 @@ where
                         skip_ascii_whitespace(&mut chars, start, callback);
                         continue;
                     }
-                    _ => scan_escape(first_char, &mut chars, mode),
+                    _ => scan_escape(&mut chars, mode),
                 }
             }
             '\n' => Ok('\n'),
             '\t' => Ok('\t'),
-            _ => scan_escape(first_char, &mut chars, mode),
+            '"' => Err(EscapeError::EscapeOnlyChar),
+            '\r' => Err(EscapeError::BareCarriageReturn),
+            _ => ascii_check(first_char, mode),
         };
         let end = initial_len - chars.as_str().len();
         callback(start..end, unescaped_char);