diff options
| author | Igor Matuszewski <Xanewok@gmail.com> | 2019-06-09 00:33:21 +0200 |
|---|---|---|
| committer | Igor Matuszewski <Xanewok@gmail.com> | 2019-06-09 00:33:21 +0200 |
| commit | d4632744fa0fb18a7c3f5058f1e8157c760353b4 (patch) | |
| tree | 82e61672f38cc072d6c4454c38e2746b6eae95f1 /src/libsyntax/parse | |
| parent | 49d62e8d5a9df16e8ed6c703031fb72d264e3469 (diff) | |
| download | rust-d4632744fa0fb18a7c3f5058f1e8157c760353b4.tar.gz rust-d4632744fa0fb18a7c3f5058f1e8157c760353b4.zip | |
Translate CRLF -> LF in raw (byte) strings
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 4 | ||||
| -rw-r--r-- | src/libsyntax/parse/literal.rs | 4 | ||||
| -rw-r--r-- | src/libsyntax/parse/unescape.rs | 44 |
3 files changed, 18 insertions, 34 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 685c17d104b..71fa4bdb2cf 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1346,7 +1346,7 @@ impl<'a> StringReader<'a> { fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { self.with_str_from_to(content_start, content_end, |lit: &str| { - unescape::unescape_raw_str(lit, &mut |range, c| { + unescape::unescape_raw_str(lit, unescape::Mode::Str, &mut |range, c| { if let Err(err) = c { emit_unescape_error( &self.sess.span_diagnostic, @@ -1363,7 +1363,7 @@ impl<'a> StringReader<'a> { fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { self.with_str_from_to(content_start, content_end, |lit: &str| { - unescape::unescape_raw_byte_str(lit, &mut |range, c| { + unescape::unescape_raw_str(lit, unescape::Mode::ByteStr, &mut |range, c| { if let Err(err) = c { emit_unescape_error( &self.sess.span_diagnostic, diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 3a2d905585c..3711512d64e 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -4,7 +4,7 @@ use crate::ast::{self, Lit, LitKind}; use crate::parse::parser::Parser; use crate::parse::PResult; use crate::parse::token::{self, Token, TokenKind}; -use crate::parse::unescape::{unescape_str, unescape_byte_str, unescape_raw_str}; +use crate::parse::unescape::{self, unescape_str, unescape_byte_str, unescape_raw_str}; use crate::parse::unescape::{unescape_char, unescape_byte}; use crate::print::pprust; use crate::symbol::{kw, sym, Symbol}; @@ -144,7 +144,7 @@ impl LitKind { let symbol = if s.contains('\r') { let mut buf = String::with_capacity(s.len()); let mut error = Ok(()); - unescape_raw_str(&s, &mut |_, unescaped_char| { + unescape_raw_str(&s, unescape::Mode::Str, &mut |_, unescaped_char| { match unescaped_char { Ok(c) => buf.push(c), Err(_) => error = Err(LitError::LexerError), diff --git a/src/libsyntax/parse/unescape.rs b/src/libsyntax/parse/unescape.rs index 819463b5472..f5b6c38083e 100644 --- a/src/libsyntax/parse/unescape.rs +++ b/src/libsyntax/parse/unescape.rs @@ -71,7 +71,7 @@ where /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we /// only translate CRLF to LF and produce errors on bare CR. -pub(crate) fn unescape_raw_str<F>(literal_text: &str, callback: &mut F) +pub(crate) fn unescape_raw_str<F>(literal_text: &str, mode: Mode, callback: &mut F) where F: FnMut(Range<usize>, Result<char, EscapeError>), { @@ -79,36 +79,20 @@ where let mut chars = literal_text.chars().peekable(); while let Some(curr) = chars.next() { - let result = match (curr, chars.peek()) { - ('\r', Some('\n')) => Ok(curr), - ('\r', _) => Err(EscapeError::BareCarriageReturn), - _ => Ok(curr), + let (result, scanned) = match (curr, chars.peek()) { + ('\r', Some('\n')) => { + chars.next(); + (Ok('\n'), [Some('\r'), Some('\n')]) + }, + ('\r', _) => + (Err(EscapeError::BareCarriageReturn), [Some('\r'), None]), + (c, _) if mode.is_bytes() && c > '\x7F' => + (Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]), + (c, _) => (Ok(c), [Some(c), None]), }; - callback(byte_offset..(byte_offset + curr.len_utf8()), result); - byte_offset += curr.len_utf8(); - } -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of characters or errors. -/// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only translate CRLF to LF and produce errors on bare CR. -pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F) -where - F: FnMut(Range<usize>, Result<char, EscapeError>), -{ - let mut byte_offset: usize = 0; - - let mut chars = literal_text.chars().peekable(); - while let Some(curr) = chars.next() { - let result = match (curr, chars.peek()) { - ('\r', Some('\n')) => Ok(curr), - ('\r', _) => Err(EscapeError::BareCarriageReturn), - (c, _) if c > '\x7F' => Err(EscapeError::NonAsciiCharInByteString), - _ => Ok(curr), - }; - callback(byte_offset..(byte_offset + curr.len_utf8()), result); - byte_offset += curr.len_utf8(); + let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum(); + callback(byte_offset..(byte_offset + len_utf8), result); + byte_offset += len_utf8; } } |
