diff options
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 4 | ||||
| -rw-r--r-- | src/libsyntax/parse/literal.rs | 26 | ||||
| -rw-r--r-- | src/libsyntax/parse/unescape.rs | 64 |
3 files changed, 69 insertions, 25 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 7f190bd7410..24dddcb6141 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1348,7 +1348,7 @@ impl<'a> StringReader<'a> { fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { self.with_str_from_to(content_start, content_end, |lit: &str| { - unescape::unescape_raw_str(lit, unescape::Mode::Str, &mut |range, c| { + unescape::unescape_raw_str(lit, &mut |range, c| { if let Err(err) = c { emit_unescape_error( &self.sess.span_diagnostic, @@ -1365,7 +1365,7 @@ impl<'a> StringReader<'a> { fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { self.with_str_from_to(content_start, content_end, |lit: &str| { - unescape::unescape_raw_str(lit, unescape::Mode::ByteStr, &mut |range, c| { + unescape::unescape_raw_byte_str(lit, &mut |range, c| { if let Err(err) = c { emit_unescape_error( &self.sess.span_diagnostic, diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 3711512d64e..3baa5858c9d 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -4,8 +4,9 @@ use crate::ast::{self, Lit, LitKind}; use crate::parse::parser::Parser; use crate::parse::PResult; use crate::parse::token::{self, Token, TokenKind}; -use crate::parse::unescape::{self, unescape_str, unescape_byte_str, unescape_raw_str}; use crate::parse::unescape::{unescape_char, unescape_byte}; +use crate::parse::unescape::{unescape_str, unescape_byte_str}; +use crate::parse::unescape::{unescape_raw_str, unescape_raw_byte_str}; use crate::print::pprust; use crate::symbol::{kw, sym, Symbol}; use crate::tokenstream::{TokenStream, TokenTree}; @@ -144,7 +145,7 @@ impl LitKind { let symbol = if s.contains('\r') { let mut buf = String::with_capacity(s.len()); let mut error = Ok(()); - unescape_raw_str(&s, unescape::Mode::Str, &mut |_, unescaped_char| { + unescape_raw_str(&s, &mut |_, unescaped_char| { match unescaped_char { Ok(c) => buf.push(c), Err(_) => error = Err(LitError::LexerError), @@ -172,7 +173,26 @@ impl LitKind { buf.shrink_to_fit(); LitKind::ByteStr(Lrc::new(buf)) } - token::ByteStrRaw(_) => LitKind::ByteStr(Lrc::new(symbol.to_string().into_bytes())), + token::ByteStrRaw(_) => { + let s = symbol.as_str(); + let bytes = if s.contains('\r') { + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_raw_byte_str(&s, &mut |_, unescaped_byte| { + match unescaped_byte { + Ok(c) => buf.push(c), + Err(_) => error = Err(LitError::LexerError), + } + }); + error?; + buf.shrink_to_fit(); + buf + } else { + symbol.to_string().into_bytes() + }; + + LitKind::ByteStr(Lrc::new(bytes)) + }, token::Err => LitKind::Err(symbol), }) } diff --git a/src/libsyntax/parse/unescape.rs b/src/libsyntax/parse/unescape.rs index f5b6c38083e..da6de8a12da 100644 --- a/src/libsyntax/parse/unescape.rs +++ b/src/libsyntax/parse/unescape.rs @@ -71,29 +71,24 @@ where /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we /// only translate CRLF to LF and produce errors on bare CR. -pub(crate) fn unescape_raw_str<F>(literal_text: &str, mode: Mode, callback: &mut F) +pub(crate) fn unescape_raw_str<F>(literal_text: &str, callback: &mut F) where F: FnMut(Range<usize>, Result<char, EscapeError>), { - let mut byte_offset: usize = 0; + unescape_raw_str_or_byte_str(literal_text, Mode::Str, callback) +} - let mut chars = literal_text.chars().peekable(); - while let Some(curr) = chars.next() { - let (result, scanned) = match (curr, chars.peek()) { - ('\r', Some('\n')) => { - chars.next(); - (Ok('\n'), [Some('\r'), Some('\n')]) - }, - ('\r', _) => - (Err(EscapeError::BareCarriageReturn), [Some('\r'), None]), - (c, _) if mode.is_bytes() && c > '\x7F' => - (Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]), - (c, _) => (Ok(c), [Some(c), None]), - }; - let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum(); - callback(byte_offset..(byte_offset + len_utf8), result); - byte_offset += len_utf8; - } +/// Takes a contents of a string literal (without quotes) and produces a +/// sequence of characters or errors. +/// NOTE: Raw strings do not perform any explicit character escaping, here we +/// only translate CRLF to LF and produce errors on bare CR. +pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F) +where + F: FnMut(Range<usize>, Result<u8, EscapeError>), +{ + unescape_raw_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| { + callback(range, char.map(byte_from_char)) + }) } #[derive(Debug, Clone, Copy)] @@ -284,9 +279,38 @@ where } } +/// Takes a contents of a string literal (without quotes) and produces a +/// sequence of characters or errors. +/// NOTE: Raw strings do not perform any explicit character escaping, here we +/// only translate CRLF to LF and produce errors on bare CR. +fn unescape_raw_str_or_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F) +where + F: FnMut(Range<usize>, Result<char, EscapeError>), +{ + let mut byte_offset: usize = 0; + + let mut chars = literal_text.chars().peekable(); + while let Some(curr) = chars.next() { + let (result, scanned) = match (curr, chars.peek()) { + ('\r', Some('\n')) => { + chars.next(); + (Ok('\n'), [Some('\r'), Some('\n')]) + }, + ('\r', _) => + (Err(EscapeError::BareCarriageReturn), [Some('\r'), None]), + (c, _) if mode.is_bytes() && !c.is_ascii() => + (Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]), + (c, _) => (Ok(c), [Some(c), None]), + }; + let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum(); + callback(byte_offset..(byte_offset + len_utf8), result); + byte_offset += len_utf8; + } +} + fn byte_from_char(c: char) -> u8 { let res = c as u32; - assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte"); + assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte(Str)"); res as u8 } |
