about summary refs log tree commit diff
path: root/src/libsyntax/parse
diff options
context:
space:
mode:
authorIgor Matuszewski <Xanewok@gmail.com>2019-06-09 00:33:21 +0200
committerIgor Matuszewski <Xanewok@gmail.com>2019-06-09 00:33:21 +0200
commitd4632744fa0fb18a7c3f5058f1e8157c760353b4 (patch)
tree82e61672f38cc072d6c4454c38e2746b6eae95f1 /src/libsyntax/parse
parent49d62e8d5a9df16e8ed6c703031fb72d264e3469 (diff)
downloadrust-d4632744fa0fb18a7c3f5058f1e8157c760353b4.tar.gz
rust-d4632744fa0fb18a7c3f5058f1e8157c760353b4.zip
Translate CRLF -> LF in raw (byte) strings
Diffstat (limited to 'src/libsyntax/parse')
-rw-r--r--src/libsyntax/parse/lexer/mod.rs4
-rw-r--r--src/libsyntax/parse/literal.rs4
-rw-r--r--src/libsyntax/parse/unescape.rs44
3 files changed, 18 insertions, 34 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 685c17d104b..71fa4bdb2cf 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -1346,7 +1346,7 @@ impl<'a> StringReader<'a> {
 
     fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
         self.with_str_from_to(content_start, content_end, |lit: &str| {
-            unescape::unescape_raw_str(lit, &mut |range, c| {
+            unescape::unescape_raw_str(lit, unescape::Mode::Str, &mut |range, c| {
                 if let Err(err) = c {
                     emit_unescape_error(
                         &self.sess.span_diagnostic,
@@ -1363,7 +1363,7 @@ impl<'a> StringReader<'a> {
 
     fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
         self.with_str_from_to(content_start, content_end, |lit: &str| {
-            unescape::unescape_raw_byte_str(lit, &mut |range, c| {
+            unescape::unescape_raw_str(lit, unescape::Mode::ByteStr, &mut |range, c| {
                 if let Err(err) = c {
                     emit_unescape_error(
                         &self.sess.span_diagnostic,
diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs
index 3a2d905585c..3711512d64e 100644
--- a/src/libsyntax/parse/literal.rs
+++ b/src/libsyntax/parse/literal.rs
@@ -4,7 +4,7 @@ use crate::ast::{self, Lit, LitKind};
 use crate::parse::parser::Parser;
 use crate::parse::PResult;
 use crate::parse::token::{self, Token, TokenKind};
-use crate::parse::unescape::{unescape_str, unescape_byte_str, unescape_raw_str};
+use crate::parse::unescape::{self, unescape_str, unescape_byte_str, unescape_raw_str};
 use crate::parse::unescape::{unescape_char, unescape_byte};
 use crate::print::pprust;
 use crate::symbol::{kw, sym, Symbol};
@@ -144,7 +144,7 @@ impl LitKind {
                 let symbol = if s.contains('\r') {
                     let mut buf = String::with_capacity(s.len());
                     let mut error = Ok(());
-                    unescape_raw_str(&s, &mut |_, unescaped_char| {
+                    unescape_raw_str(&s, unescape::Mode::Str, &mut |_, unescaped_char| {
                         match unescaped_char {
                             Ok(c) => buf.push(c),
                             Err(_) => error = Err(LitError::LexerError),
diff --git a/src/libsyntax/parse/unescape.rs b/src/libsyntax/parse/unescape.rs
index 819463b5472..f5b6c38083e 100644
--- a/src/libsyntax/parse/unescape.rs
+++ b/src/libsyntax/parse/unescape.rs
@@ -71,7 +71,7 @@ where
 /// sequence of characters or errors.
 /// NOTE: Raw strings do not perform any explicit character escaping, here we
 /// only translate CRLF to LF and produce errors on bare CR.
-pub(crate) fn unescape_raw_str<F>(literal_text: &str, callback: &mut F)
+pub(crate) fn unescape_raw_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
 where
     F: FnMut(Range<usize>, Result<char, EscapeError>),
 {
@@ -79,36 +79,20 @@ where
 
     let mut chars = literal_text.chars().peekable();
     while let Some(curr) = chars.next() {
-        let result = match (curr, chars.peek()) {
-            ('\r', Some('\n')) => Ok(curr),
-            ('\r', _) => Err(EscapeError::BareCarriageReturn),
-            _ => Ok(curr),
+        let (result, scanned) = match (curr, chars.peek()) {
+            ('\r', Some('\n')) => {
+                chars.next();
+                (Ok('\n'), [Some('\r'), Some('\n')])
+            },
+            ('\r', _) =>
+                (Err(EscapeError::BareCarriageReturn), [Some('\r'), None]),
+            (c, _) if mode.is_bytes() && c > '\x7F' =>
+                (Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]),
+            (c, _) => (Ok(c), [Some(c), None]),
         };
-        callback(byte_offset..(byte_offset + curr.len_utf8()), result);
-        byte_offset += curr.len_utf8();
-    }
-}
-
-/// Takes a contents of a string literal (without quotes) and produces a
-/// sequence of characters or errors.
-/// NOTE: Raw strings do not perform any explicit character escaping, here we
-/// only translate CRLF to LF and produce errors on bare CR.
-pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F)
-where
-    F: FnMut(Range<usize>, Result<char, EscapeError>),
-{
-    let mut byte_offset: usize = 0;
-
-    let mut chars = literal_text.chars().peekable();
-    while let Some(curr) = chars.next() {
-        let result = match (curr, chars.peek()) {
-            ('\r', Some('\n')) => Ok(curr),
-            ('\r', _) => Err(EscapeError::BareCarriageReturn),
-            (c, _) if c > '\x7F' => Err(EscapeError::NonAsciiCharInByteString),
-            _ => Ok(curr),
-        };
-        callback(byte_offset..(byte_offset + curr.len_utf8()), result);
-        byte_offset += curr.len_utf8();
+        let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum();
+        callback(byte_offset..(byte_offset + len_utf8), result);
+        byte_offset += len_utf8;
     }
 }