diff options
| author | Deadbeef <ent3rm4n@gmail.com> | 2023-03-05 15:03:22 +0000 |
|---|---|---|
| committer | Deadbeef <ent3rm4n@gmail.com> | 2023-05-02 10:30:09 +0000 |
| commit | 8ff3903643b530c9029e8f2c6c6956fda8f21d77 (patch) | |
| tree | e424bc1501abc62c70ad0c622777a0c6cb4cd0e1 /compiler | |
| parent | 7b99493492ad59c7a44c65373558175db42b4151 (diff) | |
| download | rust-8ff3903643b530c9029e8f2c6c6956fda8f21d77.tar.gz rust-8ff3903643b530c9029e8f2c6c6956fda8f21d77.zip | |
initial step towards implementing C string literals
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_ast/src/ast.rs | 3 | ||||
| -rw-r--r-- | compiler/rustc_ast/src/token.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_ast/src/util/literal.rs | 55 | ||||
| -rw-r--r-- | compiler/rustc_ast_pretty/src/pprust/state.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/concat.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/concat_bytes.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_expand/src/proc_macro_server.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_hir/src/lang_items.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs | 5 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 30 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/unescape.rs | 199 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 64 |
12 files changed, 299 insertions, 79 deletions
diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index ea04ba4f66e..fb22e464064 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1814,6 +1814,8 @@ pub enum LitKind { /// A byte string (`b"foo"`). Not stored as a symbol because it might be /// non-utf8, and symbols only allow utf8 strings. ByteStr(Lrc<[u8]>, StrStyle), + /// A C String (`c"foo"`). + CStr(Lrc<[u8]>, StrStyle), /// A byte char (`b'f'`). Byte(u8), /// A character literal (`'a'`). @@ -1868,6 +1870,7 @@ impl LitKind { // unsuffixed variants LitKind::Str(..) | LitKind::ByteStr(..) + | LitKind::CStr(..) | LitKind::Byte(..) | LitKind::Char(..) | LitKind::Int(_, LitIntType::Unsuffixed) diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index f947ae4d057..42b843482a3 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -74,6 +74,8 @@ pub enum LitKind { StrRaw(u8), // raw string delimited by `n` hash symbols ByteStr, ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols + CStr, + CStrRaw(u8), Err, } @@ -141,6 +143,10 @@ impl fmt::Display for Lit { delim = "#".repeat(n as usize), string = symbol )?, + CStr => write!(f, "c\"{symbol}\"")?, + CStrRaw(n) => { + write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))? + } Integer | Float | Bool | Err => write!(f, "{symbol}")?, } @@ -170,6 +176,7 @@ impl LitKind { Float => "float", Str | StrRaw(..) => "string", ByteStr | ByteStrRaw(..) => "byte string", + CStr | CStrRaw(..) => "C string", Err => "error", } } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 74b842ac96e..8534011e189 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,7 +2,10 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; -use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; +use rustc_lexer::unescape::{ + byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, + Mode, +}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; use std::{ascii, fmt, str}; @@ -158,6 +161,52 @@ impl LitKind { LitKind::ByteStr(bytes.into(), StrStyle::Raw(n)) } + token::CStr => { + let s = symbol.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_c_string(s, Mode::CStr, &mut |span, c| match c { + Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { + error = Err(LitError::NulInCStr(span)); + } + Ok(CStrUnit::Byte(b)) => buf.push(b), + Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), + Ok(CStrUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } + }); + error?; + buf.push(b'\0'); + LitKind::CStr(buf.into(), StrStyle::Cooked) + } + token::CStrRaw(n) => { + let s = symbol.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c { + Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { + error = Err(LitError::NulInCStr(span)); + } + Ok(CStrUnit::Byte(b)) => buf.push(b), + Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), + Ok(CStrUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } + }); + error?; + buf.push(b'\0'); + LitKind::CStr(buf.into(), StrStyle::Raw(n)) + } token::Err => LitKind::Err, }) } @@ -191,6 +240,8 @@ impl fmt::Display for LitKind { string = symbol )?; } + // TODO need to reescape + LitKind::CStr(..) => todo!(), LitKind::Int(n, ty) => { write!(f, "{n}")?; match ty { @@ -237,6 +288,8 @@ impl MetaItemLit { LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n), LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr, LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n), + LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr, + LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n), LitKind::Byte(_) => token::Byte, LitKind::Char(_) => token::Char, LitKind::Int(..) => token::Integer, diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index 849336c8669..535ac89e751 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -210,6 +210,8 @@ pub fn literal_to_string(lit: token::Lit) -> String { token::ByteStrRaw(n) => { format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol) } + // TODO + token::CStr | token::CStrRaw(_) => todo!(), token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(), }; diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index b92964d03e9..50e88ae2eee 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -32,6 +32,10 @@ pub fn expand_concat( Ok(ast::LitKind::Bool(b)) => { accumulator.push_str(&b.to_string()); } + Ok(ast::LitKind::CStr(..)) => { + cx.span_err(e.span, "cannot concatenate a C string literal"); + has_errors = true; + } Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { cx.emit_err(errors::ConcatBytestr { span: e.span }); has_errors = true; diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index ba639c0a9fe..ae674995e42 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -18,6 +18,10 @@ fn invalid_type_err( }; let snippet = cx.sess.source_map().span_to_snippet(span).ok(); match ast::LitKind::from_token_lit(token_lit) { + Ok(ast::LitKind::CStr(_, _)) => { + // TODO + cx.span_err(span, "cannot concatenate C string litearls"); + } Ok(ast::LitKind::Char(_)) => { let sugg = snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet }); diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 1e7d07bc22d..04bdea273eb 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind { token::StrRaw(n) => LitKind::StrRaw(n), token::ByteStr => LitKind::ByteStr, token::ByteStrRaw(n) => LitKind::ByteStrRaw(n), + // TODO + token::CStr | token::CStrRaw(_) => todo!(), token::Err => LitKind::Err, token::Bool => unreachable!(), } @@ -436,6 +438,8 @@ impl server::FreeFunctions for Rustc<'_, '_> { | token::LitKind::StrRaw(_) | token::LitKind::ByteStr | token::LitKind::ByteStrRaw(_) + | token::LitKind::CStr + | token::LitKind::CStrRaw(_) | token::LitKind::Err => return Err(()), token::LitKind::Integer | token::LitKind::Float => {} } diff --git a/compiler/rustc_hir/src/lang_items.rs b/compiler/rustc_hir/src/lang_items.rs index e1c030d3e19..7ddafc9083a 100644 --- a/compiler/rustc_hir/src/lang_items.rs +++ b/compiler/rustc_hir/src/lang_items.rs @@ -332,6 +332,7 @@ language_item_table! { RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None; String, sym::String, string, Target::Struct, GenericRequirement::None; + CStr, sym::CStr, c_str, Target::Struct, GenericRequirement::None; } pub enum GenericRequirement { diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index f42c825d9e8..374266638d1 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { opt_ty.unwrap_or_else(|| self.next_float_var()) } ast::LitKind::Bool(_) => tcx.types.bool, + ast::LitKind::CStr(_, _) => tcx.mk_imm_ref( + tcx.lifetimes.re_static, + tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span))) + .skip_binder(), + ), ast::LitKind::Err => tcx.ty_error_misc(), } } diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index b3f4b5cd5e5..95cb1f93e39 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -186,12 +186,16 @@ pub enum LiteralKind { Str { terminated: bool }, /// "b"abc"", "b"abc" ByteStr { terminated: bool }, + /// `c"abc"`, `c"abc` + CStr { terminated: bool }, /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates /// an invalid literal. RawStr { n_hashes: Option<u8> }, /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None` /// indicates an invalid literal. RawByteStr { n_hashes: Option<u8> }, + /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` is invalid. + RawCStr { n_hashes: Option<u8> }, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -391,6 +395,32 @@ impl Cursor<'_> { _ => self.ident_or_unknown_prefix(), }, + // TODO deduplicate this code + // c-string literal, raw c-string literal or identifier. + 'c' => match (self.first(), self.second()) { + ('"', _) => { + self.bump(); + let terminated = self.double_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = CStr { terminated }; + Literal { kind, suffix_start } + } + ('r', '"') | ('r', '#') => { + self.bump(); + let res = self.raw_double_quoted_string(2); + let suffix_start = self.pos_within_token(); + if res.is_ok() { + self.eat_literal_suffix(); + } + let kind = RawCStr { n_hashes: res.ok() }; + Literal { kind, suffix_start } + } + _ => self.ident_or_unknown_prefix(), + }, + // Identifier (this should be checked after other variant that can // start as identifier). c if is_id_start(c) => self.ident_or_unknown_prefix(), diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index bb4d91247b8..4b707c9ec96 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -90,6 +90,39 @@ where Mode::RawStr | Mode::RawByteStr => { unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) } + Mode::CStr | Mode::RawCStr => unreachable!(), + } +} + +pub enum CStrUnit { + Byte(u8), + Char(char), +} + +impl From<u8> for CStrUnit { + fn from(value: u8) -> Self { + CStrUnit::Byte(value) + } +} + +impl From<char> for CStrUnit { + fn from(value: char) -> Self { + CStrUnit::Char(value) + } +} + +pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F) +where + F: FnMut(Range<usize>, Result<CStrUnit, EscapeError>), +{ + if mode == Mode::RawCStr { + unescape_raw_str_or_raw_byte_str( + src, + mode.characters_should_be_ascii(), + &mut |r, result| callback(r, result.map(CStrUnit::Char)), + ); + } else { + unescape_str_common(src, mode, callback); } } @@ -114,19 +147,26 @@ pub enum Mode { ByteStr, RawStr, RawByteStr, + CStr, + RawCStr, } impl Mode { pub fn in_double_quotes(self) -> bool { match self { - Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true, + Mode::Str + | Mode::ByteStr + | Mode::RawStr + | Mode::RawByteStr + | Mode::CStr + | Mode::RawCStr => true, Mode::Char | Mode::Byte => false, } } pub fn is_byte(self) -> bool { match self { - Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, + Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => true, Mode::Char | Mode::Str | Mode::RawStr => false, } } @@ -163,62 +203,63 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError value as char } - 'u' => { - // We've parsed '\u', now we have to parse '{..}'. + 'u' => scan_unicode(chars, is_byte)?, + _ => return Err(EscapeError::InvalidEscape), + }; + Ok(res) +} - if chars.next() != Some('{') { - return Err(EscapeError::NoBraceInUnicodeEscape); - } +fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> { + // We've parsed '\u', now we have to parse '{..}'. - // First character must be a hexadecimal digit. - let mut n_digits = 1; - let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { - '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), - '}' => return Err(EscapeError::EmptyUnicodeEscape), - c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, - }; - - // First character is valid, now parse the rest of the number - // and closing brace. - loop { - match chars.next() { - None => return Err(EscapeError::UnclosedUnicodeEscape), - Some('_') => continue, - Some('}') => { - if n_digits > 6 { - return Err(EscapeError::OverlongUnicodeEscape); - } - - // Incorrect syntax has higher priority for error reporting - // than unallowed value for a literal. - if is_byte { - return Err(EscapeError::UnicodeEscapeInByte); - } - - break std::char::from_u32(value).ok_or_else(|| { - if value > 0x10FFFF { - EscapeError::OutOfRangeUnicodeEscape - } else { - EscapeError::LoneSurrogateUnicodeEscape - } - })?; - } - Some(c) => { - let digit: u32 = - c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; - n_digits += 1; - if n_digits > 6 { - // Stop updating value since we're sure that it's incorrect already. - continue; - } - value = value * 16 + digit; + if chars.next() != Some('{') { + return Err(EscapeError::NoBraceInUnicodeEscape); + } + + // First character must be a hexadecimal digit. + let mut n_digits = 1; + let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { + '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), + '}' => return Err(EscapeError::EmptyUnicodeEscape), + c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, + }; + + // First character is valid, now parse the rest of the number + // and closing brace. + loop { + match chars.next() { + None => return Err(EscapeError::UnclosedUnicodeEscape), + Some('_') => continue, + Some('}') => { + if n_digits > 6 { + return Err(EscapeError::OverlongUnicodeEscape); + } + + // Incorrect syntax has higher priority for error reporting + // than unallowed value for a literal. + if is_byte { + return Err(EscapeError::UnicodeEscapeInByte); + } + + break std::char::from_u32(value).ok_or_else(|| { + if value > 0x10FFFF { + EscapeError::OutOfRangeUnicodeEscape + } else { + EscapeError::LoneSurrogateUnicodeEscape } - }; + }); } - } - _ => return Err(EscapeError::InvalidEscape), - }; - Ok(res) + Some(c) => { + let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; + n_digits += 1; + if n_digits > 6 { + // Stop updating value since we're sure that it's incorrect already. + continue; + } + value = value * 16 + digit; + } + }; + } } #[inline] @@ -266,7 +307,9 @@ where // if unescaped '\' character is followed by '\n'. // For details see [Rust language reference] // (https://doc.rust-lang.org/reference/tokens.html#string-literals). - skip_ascii_whitespace(&mut chars, start, callback); + skip_ascii_whitespace(&mut chars, start, &mut |range, err| { + callback(range, Err(err)) + }); continue; } _ => scan_escape(&mut chars, is_byte), @@ -281,32 +324,32 @@ where let end = src.len() - chars.as_str().len(); callback(start..end, res); } +} - fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F) - where - F: FnMut(Range<usize>, Result<char, EscapeError>), - { - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; - callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning)); - } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().nth(0) { - if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning)); - } +fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F) +where + F: FnMut(Range<usize>, EscapeError), +{ + let tail = chars.as_str(); + let first_non_space = tail + .bytes() + .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') + .unwrap_or(tail.len()); + if tail[1..first_non_space].contains('\n') { + // The +1 accounts for the escaping slash. + let end = start + first_non_space + 1; + callback(start..end, EscapeError::MultipleSkippedLinesWarning); + } + let tail = &tail[first_non_space..]; + if let Some(c) = tail.chars().nth(0) { + if c.is_whitespace() { + // For error reporting, we would like the span to contain the character that was not + // skipped. The +1 is necessary to account for the leading \ that started the escape. + let end = start + first_non_space + c.len_utf8() + 1; + callback(start..end, EscapeError::UnskippedWhitespaceWarning); } - *chars = tail.chars(); } + *chars = tail.chars(); } /// Takes a contents of a string literal (without quotes) and produces a diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index a4a75fcb969..b2050780309 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -415,6 +415,16 @@ impl<'a> StringReader<'a> { } self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } + rustc_lexer::LiteralKind::CStr { terminated } => { + if !terminated { + self.sess.span_diagnostic.span_fatal_with_code( + self.mk_sp(start + BytePos(1), end), + "unterminated C string", + error_code!(E0767), + ) + } + self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); @@ -433,6 +443,15 @@ impl<'a> StringReader<'a> { self.report_raw_str_error(start, 2); } } + rustc_lexer::LiteralKind::RawCStr { n_hashes } => { + if let Some(n_hashes) = n_hashes { + let n = u32::from(n_hashes); + let kind = token::CStrRaw(n_hashes); + self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + } else { + self.report_raw_str_error(start, 2); + } + } rustc_lexer::LiteralKind::Int { base, empty_int } => { if empty_int { let span = self.mk_sp(start, end); @@ -692,6 +711,51 @@ impl<'a> StringReader<'a> { (token::Err, self.symbol_from_to(start, end)) } } + + fn cook_c_string( + &self, + kind: token::LitKind, + mode: Mode, + start: BytePos, + end: BytePos, + prefix_len: u32, + postfix_len: u32, + ) -> (token::LitKind, Symbol) { + let mut has_fatal_err = false; + let content_start = start + BytePos(prefix_len); + let content_end = end - BytePos(postfix_len); + let lit_content = self.str_from_to(content_start, content_end); + unescape::unescape_c_string(lit_content, mode, &mut |range, result| { + // Here we only check for errors. The actual unescaping is done later. + if let Err(err) = result { + let span_with_quotes = self.mk_sp(start, end); + let (start, end) = (range.start as u32, range.end as u32); + let lo = content_start + BytePos(start); + let hi = lo + BytePos(end - start); + let span = self.mk_sp(lo, hi); + if err.is_fatal() { + has_fatal_err = true; + } + emit_unescape_error( + &self.sess.span_diagnostic, + lit_content, + span_with_quotes, + span, + mode, + range, + err, + ); + } + }); + + // We normally exclude the quotes for the symbol, but for errors we + // include it because it results in clearer error messages. + if !has_fatal_err { + (kind, Symbol::intern(lit_content)) + } else { + (token::Err, self.symbol_from_to(start, end)) + } + } } pub fn nfc_normalize(string: &str) -> Symbol { |
