initial step towards implementing C string literals

author: Deadbeef <ent3rm4n@gmail.com> 2023-03-05 15:03:22 +0000
committer: Deadbeef <ent3rm4n@gmail.com> 2023-05-02 10:30:09 +0000
commit: 8ff3903643b530c9029e8f2c6c6956fda8f21d77 (patch)
tree: e424bc1501abc62c70ad0c622777a0c6cb4cd0e1 /compiler
parent: 7b99493492ad59c7a44c65373558175db42b4151 (diff)
download: rust-8ff3903643b530c9029e8f2c6c6956fda8f21d77.tar.gz
rust-8ff3903643b530c9029e8f2c6c6956fda8f21d77.zip
12 files changed, 299 insertions, 79 deletions
diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs
index ea04ba4f66e..fb22e464064 100644
--- a/compiler/rustc_ast/src/ast.rs
+++ b/compiler/rustc_ast/src/ast.rs
@@ -1814,6 +1814,8 @@ pub enum LitKind {
     /// A byte string (`b"foo"`). Not stored as a symbol because it might be
     /// non-utf8, and symbols only allow utf8 strings.
     ByteStr(Lrc<[u8]>, StrStyle),
+    /// A C String (`c"foo"`).
+    CStr(Lrc<[u8]>, StrStyle),
     /// A byte char (`b'f'`).
     Byte(u8),
     /// A character literal (`'a'`).
@@ -1868,6 +1870,7 @@ impl LitKind {
             // unsuffixed variants
             LitKind::Str(..)
             | LitKind::ByteStr(..)
+            | LitKind::CStr(..)
             | LitKind::Byte(..)
             | LitKind::Char(..)
             | LitKind::Int(_, LitIntType::Unsuffixed)
diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs
index f947ae4d057..42b843482a3 100644
--- a/compiler/rustc_ast/src/token.rs
+++ b/compiler/rustc_ast/src/token.rs
@@ -74,6 +74,8 @@ pub enum LitKind {
     StrRaw(u8), // raw string delimited by `n` hash symbols
     ByteStr,
     ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
+    CStr,
+    CStrRaw(u8),
     Err,
 }
 
@@ -141,6 +143,10 @@ impl fmt::Display for Lit {
                 delim = "#".repeat(n as usize),
                 string = symbol
             )?,
+            CStr => write!(f, "c\"{symbol}\"")?,
+            CStrRaw(n) => {
+                write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
+            }
             Integer | Float | Bool | Err => write!(f, "{symbol}")?,
         }
 
@@ -170,6 +176,7 @@ impl LitKind {
             Float => "float",
             Str | StrRaw(..) => "string",
             ByteStr | ByteStrRaw(..) => "byte string",
+            CStr | CStrRaw(..) => "C string",
             Err => "error",
         }
     }
diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index 74b842ac96e..8534011e189 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -2,7 +2,10 @@
 
 use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
 use crate::token::{self, Token};
-use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
+use rustc_lexer::unescape::{
+    byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
+    Mode,
+};
 use rustc_span::symbol::{kw, sym, Symbol};
 use rustc_span::Span;
 use std::{ascii, fmt, str};
@@ -158,6 +161,52 @@ impl LitKind {
 
                 LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
             }
+            token::CStr => {
+                let s = symbol.as_str();
+                let mut buf = Vec::with_capacity(s.len());
+                let mut error = Ok(());
+                unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
+                    Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
+                        error = Err(LitError::NulInCStr(span));
+                    }
+                    Ok(CStrUnit::Byte(b)) => buf.push(b),
+                    Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
+                    Ok(CStrUnit::Char(c)) => {
+                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                    }
+                    Err(err) => {
+                        if err.is_fatal() {
+                            error = Err(LitError::LexerError);
+                        }
+                    }
+                });
+                error?;
+                buf.push(b'\0');
+                LitKind::CStr(buf.into(), StrStyle::Cooked)
+            }
+            token::CStrRaw(n) => {
+                let s = symbol.as_str();
+                let mut buf = Vec::with_capacity(s.len());
+                let mut error = Ok(());
+                unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
+                    Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
+                        error = Err(LitError::NulInCStr(span));
+                    }
+                    Ok(CStrUnit::Byte(b)) => buf.push(b),
+                    Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
+                    Ok(CStrUnit::Char(c)) => {
+                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                    }
+                    Err(err) => {
+                        if err.is_fatal() {
+                            error = Err(LitError::LexerError);
+                        }
+                    }
+                });
+                error?;
+                buf.push(b'\0');
+                LitKind::CStr(buf.into(), StrStyle::Raw(n))
+            }
             token::Err => LitKind::Err,
         })
     }
@@ -191,6 +240,8 @@ impl fmt::Display for LitKind {
                     string = symbol
                 )?;
             }
+            // TODO need to reescape
+            LitKind::CStr(..) => todo!(),
             LitKind::Int(n, ty) => {
                 write!(f, "{n}")?;
                 match ty {
@@ -237,6 +288,8 @@ impl MetaItemLit {
             LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
             LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
             LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
+            LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
+            LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
             LitKind::Byte(_) => token::Byte,
             LitKind::Char(_) => token::Char,
             LitKind::Int(..) => token::Integer,
diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs
index 849336c8669..535ac89e751 100644
--- a/compiler/rustc_ast_pretty/src/pprust/state.rs
+++ b/compiler/rustc_ast_pretty/src/pprust/state.rs
@@ -210,6 +210,8 @@ pub fn literal_to_string(lit: token::Lit) -> String {
         token::ByteStrRaw(n) => {
             format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol)
         }
+        // TODO
+        token::CStr | token::CStrRaw(_) => todo!(),
         token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(),
     };
 
diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs
index b92964d03e9..50e88ae2eee 100644
--- a/compiler/rustc_builtin_macros/src/concat.rs
+++ b/compiler/rustc_builtin_macros/src/concat.rs
@@ -32,6 +32,10 @@ pub fn expand_concat(
                 Ok(ast::LitKind::Bool(b)) => {
                     accumulator.push_str(&b.to_string());
                 }
+                Ok(ast::LitKind::CStr(..)) => {
+                    cx.span_err(e.span, "cannot concatenate a C string literal");
+                    has_errors = true;
+                }
                 Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
                     cx.emit_err(errors::ConcatBytestr { span: e.span });
                     has_errors = true;
diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs
index ba639c0a9fe..ae674995e42 100644
--- a/compiler/rustc_builtin_macros/src/concat_bytes.rs
+++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs
@@ -18,6 +18,10 @@ fn invalid_type_err(
     };
     let snippet = cx.sess.source_map().span_to_snippet(span).ok();
     match ast::LitKind::from_token_lit(token_lit) {
+        Ok(ast::LitKind::CStr(_, _)) => {
+            // TODO
+            cx.span_err(span, "cannot concatenate C string litearls");
+        }
         Ok(ast::LitKind::Char(_)) => {
             let sugg =
                 snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet });
diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs
index 1e7d07bc22d..04bdea273eb 100644
--- a/compiler/rustc_expand/src/proc_macro_server.rs
+++ b/compiler/rustc_expand/src/proc_macro_server.rs
@@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind {
             token::StrRaw(n) => LitKind::StrRaw(n),
             token::ByteStr => LitKind::ByteStr,
             token::ByteStrRaw(n) => LitKind::ByteStrRaw(n),
+            // TODO
+            token::CStr | token::CStrRaw(_) => todo!(),
             token::Err => LitKind::Err,
             token::Bool => unreachable!(),
         }
@@ -436,6 +438,8 @@ impl server::FreeFunctions for Rustc<'_, '_> {
                 | token::LitKind::StrRaw(_)
                 | token::LitKind::ByteStr
                 | token::LitKind::ByteStrRaw(_)
+                | token::LitKind::CStr
+                | token::LitKind::CStrRaw(_)
                 | token::LitKind::Err => return Err(()),
                 token::LitKind::Integer | token::LitKind::Float => {}
             }
diff --git a/compiler/rustc_hir/src/lang_items.rs b/compiler/rustc_hir/src/lang_items.rs
index e1c030d3e19..7ddafc9083a 100644
--- a/compiler/rustc_hir/src/lang_items.rs
+++ b/compiler/rustc_hir/src/lang_items.rs
@@ -332,6 +332,7 @@ language_item_table! {
     RangeTo,                 sym::RangeTo,             range_to_struct,            Target::Struct,         GenericRequirement::None;
 
     String,                  sym::String,              string,                     Target::Struct,         GenericRequirement::None;
+    CStr,                    sym::CStr,                c_str,                      Target::Struct,         GenericRequirement::None;
 }
 
 pub enum GenericRequirement {
diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
index f42c825d9e8..374266638d1 100644
--- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
+++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
@@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
                 opt_ty.unwrap_or_else(|| self.next_float_var())
             }
             ast::LitKind::Bool(_) => tcx.types.bool,
+            ast::LitKind::CStr(_, _) => tcx.mk_imm_ref(
+                tcx.lifetimes.re_static,
+                tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span)))
+                    .skip_binder(),
+            ),
             ast::LitKind::Err => tcx.ty_error_misc(),
         }
     }
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index b3f4b5cd5e5..95cb1f93e39 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -186,12 +186,16 @@ pub enum LiteralKind {
     Str { terminated: bool },
     /// "b"abc"", "b"abc"
     ByteStr { terminated: bool },
+    /// `c"abc"`, `c"abc`
+    CStr { terminated: bool },
     /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
     /// an invalid literal.
     RawStr { n_hashes: Option<u8> },
     /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
     /// indicates an invalid literal.
     RawByteStr { n_hashes: Option<u8> },
+    /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` is invalid.
+    RawCStr { n_hashes: Option<u8> },
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -391,6 +395,32 @@ impl Cursor<'_> {
                 _ => self.ident_or_unknown_prefix(),
             },
 
+            // TODO deduplicate this code
+            // c-string literal, raw c-string literal or identifier.
+            'c' => match (self.first(), self.second()) {
+                ('"', _) => {
+                    self.bump();
+                    let terminated = self.double_quoted_string();
+                    let suffix_start = self.pos_within_token();
+                    if terminated {
+                        self.eat_literal_suffix();
+                    }
+                    let kind = CStr { terminated };
+                    Literal { kind, suffix_start }
+                }
+                ('r', '"') | ('r', '#') => {
+                    self.bump();
+                    let res = self.raw_double_quoted_string(2);
+                    let suffix_start = self.pos_within_token();
+                    if res.is_ok() {
+                        self.eat_literal_suffix();
+                    }
+                    let kind = RawCStr { n_hashes: res.ok() };
+                    Literal { kind, suffix_start }
+                }
+                _ => self.ident_or_unknown_prefix(),
+            },
+
             // Identifier (this should be checked after other variant that can
             // start as identifier).
             c if is_id_start(c) => self.ident_or_unknown_prefix(),
diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index bb4d91247b8..4b707c9ec96 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@@ -90,6 +90,39 @@ where
         Mode::RawStr | Mode::RawByteStr => {
             unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
         }
+        Mode::CStr | Mode::RawCStr => unreachable!(),
+    }
+}
+
+pub enum CStrUnit {
+    Byte(u8),
+    Char(char),
+}
+
+impl From<u8> for CStrUnit {
+    fn from(value: u8) -> Self {
+        CStrUnit::Byte(value)
+    }
+}
+
+impl From<char> for CStrUnit {
+    fn from(value: char) -> Self {
+        CStrUnit::Char(value)
+    }
+}
+
+pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F)
+where
+    F: FnMut(Range<usize>, Result<CStrUnit, EscapeError>),
+{
+    if mode == Mode::RawCStr {
+        unescape_raw_str_or_raw_byte_str(
+            src,
+            mode.characters_should_be_ascii(),
+            &mut |r, result| callback(r, result.map(CStrUnit::Char)),
+        );
+    } else {
+        unescape_str_common(src, mode, callback);
     }
 }
 
@@ -114,19 +147,26 @@ pub enum Mode {
     ByteStr,
     RawStr,
     RawByteStr,
+    CStr,
+    RawCStr,
 }
 
 impl Mode {
     pub fn in_double_quotes(self) -> bool {
         match self {
-            Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true,
+            Mode::Str
+            | Mode::ByteStr
+            | Mode::RawStr
+            | Mode::RawByteStr
+            | Mode::CStr
+            | Mode::RawCStr => true,
             Mode::Char | Mode::Byte => false,
         }
     }
 
     pub fn is_byte(self) -> bool {
         match self {
-            Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
+            Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => true,
             Mode::Char | Mode::Str | Mode::RawStr => false,
         }
     }
@@ -163,62 +203,63 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError
             value as char
         }
 
-        'u' => {
-            // We've parsed '\u', now we have to parse '{..}'.
+        'u' => scan_unicode(chars, is_byte)?,
+        _ => return Err(EscapeError::InvalidEscape),
+    };
+    Ok(res)
+}
 
-            if chars.next() != Some('{') {
-                return Err(EscapeError::NoBraceInUnicodeEscape);
-            }
+fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
+    // We've parsed '\u', now we have to parse '{..}'.
 
-            // First character must be a hexadecimal digit.
-            let mut n_digits = 1;
-            let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
-                '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
-                '}' => return Err(EscapeError::EmptyUnicodeEscape),
-                c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
-            };
-
-            // First character is valid, now parse the rest of the number
-            // and closing brace.
-            loop {
-                match chars.next() {
-                    None => return Err(EscapeError::UnclosedUnicodeEscape),
-                    Some('_') => continue,
-                    Some('}') => {
-                        if n_digits > 6 {
-                            return Err(EscapeError::OverlongUnicodeEscape);
-                        }
-
-                        // Incorrect syntax has higher priority for error reporting
-                        // than unallowed value for a literal.
-                        if is_byte {
-                            return Err(EscapeError::UnicodeEscapeInByte);
-                        }
-
-                        break std::char::from_u32(value).ok_or_else(|| {
-                            if value > 0x10FFFF {
-                                EscapeError::OutOfRangeUnicodeEscape
-                            } else {
-                                EscapeError::LoneSurrogateUnicodeEscape
-                            }
-                        })?;
-                    }
-                    Some(c) => {
-                        let digit: u32 =
-                            c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
-                        n_digits += 1;
-                        if n_digits > 6 {
-                            // Stop updating value since we're sure that it's incorrect already.
-                            continue;
-                        }
-                        value = value * 16 + digit;
+    if chars.next() != Some('{') {
+        return Err(EscapeError::NoBraceInUnicodeEscape);
+    }
+
+    // First character must be a hexadecimal digit.
+    let mut n_digits = 1;
+    let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
+        '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
+        '}' => return Err(EscapeError::EmptyUnicodeEscape),
+        c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
+    };
+
+    // First character is valid, now parse the rest of the number
+    // and closing brace.
+    loop {
+        match chars.next() {
+            None => return Err(EscapeError::UnclosedUnicodeEscape),
+            Some('_') => continue,
+            Some('}') => {
+                if n_digits > 6 {
+                    return Err(EscapeError::OverlongUnicodeEscape);
+                }
+
+                // Incorrect syntax has higher priority for error reporting
+                // than unallowed value for a literal.
+                if is_byte {
+                    return Err(EscapeError::UnicodeEscapeInByte);
+                }
+
+                break std::char::from_u32(value).ok_or_else(|| {
+                    if value > 0x10FFFF {
+                        EscapeError::OutOfRangeUnicodeEscape
+                    } else {
+                        EscapeError::LoneSurrogateUnicodeEscape
                     }
-                };
+                });
             }
-        }
-        _ => return Err(EscapeError::InvalidEscape),
-    };
-    Ok(res)
+            Some(c) => {
+                let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
+                n_digits += 1;
+                if n_digits > 6 {
+                    // Stop updating value since we're sure that it's incorrect already.
+                    continue;
+                }
+                value = value * 16 + digit;
+            }
+        };
+    }
 }
 
 #[inline]
@@ -266,7 +307,9 @@ where
                         // if unescaped '\' character is followed by '\n'.
                         // For details see [Rust language reference]
                         // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
-                        skip_ascii_whitespace(&mut chars, start, callback);
+                        skip_ascii_whitespace(&mut chars, start, &mut |range, err| {
+                            callback(range, Err(err))
+                        });
                         continue;
                     }
                     _ => scan_escape(&mut chars, is_byte),
@@ -281,32 +324,32 @@ where
         let end = src.len() - chars.as_str().len();
         callback(start..end, res);
     }
+}
 
-    fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
-    where
-        F: FnMut(Range<usize>, Result<char, EscapeError>),
-    {
-        let tail = chars.as_str();
-        let first_non_space = tail
-            .bytes()
-            .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
-            .unwrap_or(tail.len());
-        if tail[1..first_non_space].contains('\n') {
-            // The +1 accounts for the escaping slash.
-            let end = start + first_non_space + 1;
-            callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning));
-        }
-        let tail = &tail[first_non_space..];
-        if let Some(c) = tail.chars().nth(0) {
-            if c.is_whitespace() {
-                // For error reporting, we would like the span to contain the character that was not
-                // skipped. The +1 is necessary to account for the leading \ that started the escape.
-                let end = start + first_non_space + c.len_utf8() + 1;
-                callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
-            }
+fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
+where
+    F: FnMut(Range<usize>, EscapeError),
+{
+    let tail = chars.as_str();
+    let first_non_space = tail
+        .bytes()
+        .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
+        .unwrap_or(tail.len());
+    if tail[1..first_non_space].contains('\n') {
+        // The +1 accounts for the escaping slash.
+        let end = start + first_non_space + 1;
+        callback(start..end, EscapeError::MultipleSkippedLinesWarning);
+    }
+    let tail = &tail[first_non_space..];
+    if let Some(c) = tail.chars().nth(0) {
+        if c.is_whitespace() {
+            // For error reporting, we would like the span to contain the character that was not
+            // skipped. The +1 is necessary to account for the leading \ that started the escape.
+            let end = start + first_non_space + c.len_utf8() + 1;
+            callback(start..end, EscapeError::UnskippedWhitespaceWarning);
         }
-        *chars = tail.chars();
     }
+    *chars = tail.chars();
 }
 
 /// Takes a contents of a string literal (without quotes) and produces a
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index a4a75fcb969..b2050780309 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -415,6 +415,16 @@ impl<'a> StringReader<'a> {
                 }
                 self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
             }
+            rustc_lexer::LiteralKind::CStr { terminated } => {
+                if !terminated {
+                    self.sess.span_diagnostic.span_fatal_with_code(
+                        self.mk_sp(start + BytePos(1), end),
+                        "unterminated C string",
+                        error_code!(E0767),
+                    )
+                }
+                self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
+            }
             rustc_lexer::LiteralKind::RawStr { n_hashes } => {
                 if let Some(n_hashes) = n_hashes {
                     let n = u32::from(n_hashes);
@@ -433,6 +443,15 @@ impl<'a> StringReader<'a> {
                     self.report_raw_str_error(start, 2);
                 }
             }
+            rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
+                if let Some(n_hashes) = n_hashes {
+                    let n = u32::from(n_hashes);
+                    let kind = token::CStrRaw(n_hashes);
+                    self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
+                } else {
+                    self.report_raw_str_error(start, 2);
+                }
+            }
             rustc_lexer::LiteralKind::Int { base, empty_int } => {
                 if empty_int {
                     let span = self.mk_sp(start, end);
@@ -692,6 +711,51 @@ impl<'a> StringReader<'a> {
             (token::Err, self.symbol_from_to(start, end))
         }
     }
+
+    fn cook_c_string(
+        &self,
+        kind: token::LitKind,
+        mode: Mode,
+        start: BytePos,
+        end: BytePos,
+        prefix_len: u32,
+        postfix_len: u32,
+    ) -> (token::LitKind, Symbol) {
+        let mut has_fatal_err = false;
+        let content_start = start + BytePos(prefix_len);
+        let content_end = end - BytePos(postfix_len);
+        let lit_content = self.str_from_to(content_start, content_end);
+        unescape::unescape_c_string(lit_content, mode, &mut |range, result| {
+            // Here we only check for errors. The actual unescaping is done later.
+            if let Err(err) = result {
+                let span_with_quotes = self.mk_sp(start, end);
+                let (start, end) = (range.start as u32, range.end as u32);
+                let lo = content_start + BytePos(start);
+                let hi = lo + BytePos(end - start);
+                let span = self.mk_sp(lo, hi);
+                if err.is_fatal() {
+                    has_fatal_err = true;
+                }
+                emit_unescape_error(
+                    &self.sess.span_diagnostic,
+                    lit_content,
+                    span_with_quotes,
+                    span,
+                    mode,
+                    range,
+                    err,
+                );
+            }
+        });
+
+        // We normally exclude the quotes for the symbol, but for errors we
+        // include it because it results in clearer error messages.
+        if !has_fatal_err {
+            (kind, Symbol::intern(lit_content))
+        } else {
+            (token::Err, self.symbol_from_to(start, end))
+        }
+    }
 }
 
 pub fn nfc_normalize(string: &str) -> Symbol {
author	Deadbeef <ent3rm4n@gmail.com>	2023-03-05 15:03:22 +0000
committer	Deadbeef <ent3rm4n@gmail.com>	2023-05-02 10:30:09 +0000
commit	8ff3903643b530c9029e8f2c6c6956fda8f21d77 (patch)
tree	e424bc1501abc62c70ad0c622777a0c6cb4cd0e1 /compiler
parent	7b99493492ad59c7a44c65373558175db42b4151 (diff)
download	rust-8ff3903643b530c9029e8f2c6c6956fda8f21d77.tar.gz rust-8ff3903643b530c9029e8f2c6c6956fda8f21d77.zip