about summary refs log tree commit diff
path: root/src/libsyntax/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/libsyntax/parse')
-rw-r--r--src/libsyntax/parse/lexer/mod.rs4
-rw-r--r--src/libsyntax/parse/literal.rs26
-rw-r--r--src/libsyntax/parse/unescape.rs64
3 files changed, 69 insertions, 25 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 7f190bd7410..24dddcb6141 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -1348,7 +1348,7 @@ impl<'a> StringReader<'a> {
 
     fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
         self.with_str_from_to(content_start, content_end, |lit: &str| {
-            unescape::unescape_raw_str(lit, unescape::Mode::Str, &mut |range, c| {
+            unescape::unescape_raw_str(lit, &mut |range, c| {
                 if let Err(err) = c {
                     emit_unescape_error(
                         &self.sess.span_diagnostic,
@@ -1365,7 +1365,7 @@ impl<'a> StringReader<'a> {
 
     fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
         self.with_str_from_to(content_start, content_end, |lit: &str| {
-            unescape::unescape_raw_str(lit, unescape::Mode::ByteStr, &mut |range, c| {
+            unescape::unescape_raw_byte_str(lit, &mut |range, c| {
                 if let Err(err) = c {
                     emit_unescape_error(
                         &self.sess.span_diagnostic,
diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs
index 3711512d64e..3baa5858c9d 100644
--- a/src/libsyntax/parse/literal.rs
+++ b/src/libsyntax/parse/literal.rs
@@ -4,8 +4,9 @@ use crate::ast::{self, Lit, LitKind};
 use crate::parse::parser::Parser;
 use crate::parse::PResult;
 use crate::parse::token::{self, Token, TokenKind};
-use crate::parse::unescape::{self, unescape_str, unescape_byte_str, unescape_raw_str};
 use crate::parse::unescape::{unescape_char, unescape_byte};
+use crate::parse::unescape::{unescape_str, unescape_byte_str};
+use crate::parse::unescape::{unescape_raw_str, unescape_raw_byte_str};
 use crate::print::pprust;
 use crate::symbol::{kw, sym, Symbol};
 use crate::tokenstream::{TokenStream, TokenTree};
@@ -144,7 +145,7 @@ impl LitKind {
                 let symbol = if s.contains('\r') {
                     let mut buf = String::with_capacity(s.len());
                     let mut error = Ok(());
-                    unescape_raw_str(&s, unescape::Mode::Str, &mut |_, unescaped_char| {
+                    unescape_raw_str(&s, &mut |_, unescaped_char| {
                         match unescaped_char {
                             Ok(c) => buf.push(c),
                             Err(_) => error = Err(LitError::LexerError),
@@ -172,7 +173,26 @@ impl LitKind {
                 buf.shrink_to_fit();
                 LitKind::ByteStr(Lrc::new(buf))
             }
-            token::ByteStrRaw(_) => LitKind::ByteStr(Lrc::new(symbol.to_string().into_bytes())),
+            token::ByteStrRaw(_) => {
+                let s = symbol.as_str();
+                let bytes = if s.contains('\r') {
+                    let mut buf = Vec::with_capacity(s.len());
+                    let mut error = Ok(());
+                    unescape_raw_byte_str(&s, &mut |_, unescaped_byte| {
+                        match unescaped_byte {
+                            Ok(c) => buf.push(c),
+                            Err(_) => error = Err(LitError::LexerError),
+                        }
+                    });
+                    error?;
+                    buf.shrink_to_fit();
+                    buf
+                } else {
+                    symbol.to_string().into_bytes()
+                };
+
+                LitKind::ByteStr(Lrc::new(bytes))
+            },
             token::Err => LitKind::Err(symbol),
         })
     }
diff --git a/src/libsyntax/parse/unescape.rs b/src/libsyntax/parse/unescape.rs
index f5b6c38083e..da6de8a12da 100644
--- a/src/libsyntax/parse/unescape.rs
+++ b/src/libsyntax/parse/unescape.rs
@@ -71,29 +71,24 @@ where
 /// sequence of characters or errors.
 /// NOTE: Raw strings do not perform any explicit character escaping, here we
 /// only translate CRLF to LF and produce errors on bare CR.
-pub(crate) fn unescape_raw_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
+pub(crate) fn unescape_raw_str<F>(literal_text: &str, callback: &mut F)
 where
     F: FnMut(Range<usize>, Result<char, EscapeError>),
 {
-    let mut byte_offset: usize = 0;
+    unescape_raw_str_or_byte_str(literal_text, Mode::Str, callback)
+}
 
-    let mut chars = literal_text.chars().peekable();
-    while let Some(curr) = chars.next() {
-        let (result, scanned) = match (curr, chars.peek()) {
-            ('\r', Some('\n')) => {
-                chars.next();
-                (Ok('\n'), [Some('\r'), Some('\n')])
-            },
-            ('\r', _) =>
-                (Err(EscapeError::BareCarriageReturn), [Some('\r'), None]),
-            (c, _) if mode.is_bytes() && c > '\x7F' =>
-                (Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]),
-            (c, _) => (Ok(c), [Some(c), None]),
-        };
-        let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum();
-        callback(byte_offset..(byte_offset + len_utf8), result);
-        byte_offset += len_utf8;
-    }
+/// Takes a contents of a string literal (without quotes) and produces a
+/// sequence of characters or errors.
+/// NOTE: Raw strings do not perform any explicit character escaping, here we
+/// only translate CRLF to LF and produce errors on bare CR.
+pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F)
+where
+    F: FnMut(Range<usize>, Result<u8, EscapeError>),
+{
+    unescape_raw_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
+        callback(range, char.map(byte_from_char))
+    })
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -284,9 +279,38 @@ where
     }
 }
 
+/// Takes a contents of a string literal (without quotes) and produces a
+/// sequence of characters or errors.
+/// NOTE: Raw strings do not perform any explicit character escaping, here we
+/// only translate CRLF to LF and produce errors on bare CR.
+fn unescape_raw_str_or_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
+where
+    F: FnMut(Range<usize>, Result<char, EscapeError>),
+{
+    let mut byte_offset: usize = 0;
+
+    let mut chars = literal_text.chars().peekable();
+    while let Some(curr) = chars.next() {
+        let (result, scanned) = match (curr, chars.peek()) {
+            ('\r', Some('\n')) => {
+                chars.next();
+                (Ok('\n'), [Some('\r'), Some('\n')])
+            },
+            ('\r', _) =>
+                (Err(EscapeError::BareCarriageReturn), [Some('\r'), None]),
+            (c, _) if mode.is_bytes() && !c.is_ascii() =>
+                (Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]),
+            (c, _) => (Ok(c), [Some(c), None]),
+        };
+        let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum();
+        callback(byte_offset..(byte_offset + len_utf8), result);
+        byte_offset += len_utf8;
+    }
+}
+
 fn byte_from_char(c: char) -> u8 {
     let res = c as u32;
-    assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte");
+    assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte(Str)");
     res as u8
 }