diff options
| author | Igor Matuszewski <Xanewok@gmail.com> | 2019-05-13 20:21:44 +0200 |
|---|---|---|
| committer | Igor Matuszewski <Xanewok@gmail.com> | 2019-06-08 22:58:53 +0200 |
| commit | 49d62e8d5a9df16e8ed6c703031fb72d264e3469 (patch) | |
| tree | 432febaebf84a451599d8ba3db0ab1cb8447694c | |
| parent | cab7e7fe76c3c881078f068a8da4a863efdd2c77 (diff) | |
| download | rust-49d62e8d5a9df16e8ed6c703031fb72d264e3469.tar.gz rust-49d62e8d5a9df16e8ed6c703031fb72d264e3469.zip | |
Prohibit bare CRs in raw byte strings
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 94 | ||||
| -rw-r--r-- | src/libsyntax/parse/unescape.rs | 24 | ||||
| -rw-r--r-- | src/libsyntax/parse/unescape_error_reporting.rs | 5 | ||||
| -rw-r--r-- | src/test/ui/parser/raw-byte-string-literals.rs | 3 | ||||
| -rw-r--r-- | src/test/ui/parser/raw-byte-string-literals.stderr | 14 |
5 files changed, 66 insertions, 74 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index e3830b1e3b6..685c17d104b 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -292,15 +292,6 @@ impl<'a> StringReader<'a> { self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..]) } - /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an - /// escaped character to the error message - fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) { - let mut m = m.to_string(); - m.push_str(": "); - push_escaped_char(&mut m, c); - self.err_span_(from_pos, to_pos, &m[..]); - } - /// Advance peek_token to refer to the next token, and /// possibly update the interner. fn advance_token(&mut self) -> Result<(), ()> { @@ -1070,7 +1061,13 @@ impl<'a> StringReader<'a> { self.validate_byte_str_escape(start_with_quote); (token::ByteStr, symbol) }, - Some('r') => self.scan_raw_byte_string(), + Some('r') => { + let (start, end, hash_count) = self.scan_raw_string(); + let symbol = self.name_from_to(start, end); + self.validate_raw_byte_str_escape(start, end); + + (token::ByteStrRaw(hash_count), symbol) + } _ => unreachable!(), // Should have been a token::Ident above. }; let suffix = self.scan_optional_raw_name(); @@ -1300,66 +1297,6 @@ impl<'a> StringReader<'a> { (content_start_bpos, content_end_bpos, hash_count) } - fn scan_raw_byte_string(&mut self) -> (token::LitKind, Symbol) { - let start_bpos = self.pos; - self.bump(); - let mut hash_count = 0; - while self.ch_is('#') { - if hash_count == 65535 { - let bpos = self.next_pos; - self.fatal_span_(start_bpos, - bpos, - "too many `#` symbols: raw strings may be \ - delimited by up to 65535 `#` symbols").raise(); - } - self.bump(); - hash_count += 1; - } - - if self.is_eof() { - self.fail_unterminated_raw_string(start_bpos, hash_count); - } else if !self.ch_is('"') { - let last_bpos = self.pos; - let curr_char = self.ch.unwrap(); - self.fatal_span_char(start_bpos, - pos, - "found invalid character; only `#` is allowed in raw \ - string delimitation", - ch).raise(); - } - self.bump(); - let content_start_bpos = self.pos; - let mut content_end_bpos; - 'outer: loop { - match self.ch { - None => { - self.fail_unterminated_raw_string(start_bpos, hash_count); - } - Some('"') => { - content_end_bpos = self.pos; - for _ in 0..hash_count { - self.bump(); - if !self.ch_is('#') { - continue 'outer; - } - } - break; - } - Some(c) => { - if c > '\x7F' { - let pos = self.pos; - self.err_span_char(pos, pos, "raw byte string must be ASCII", c); - } - } - } - self.bump(); - } - - self.bump(); - - (token::ByteStrRaw(hash_count), self.name_from_to(content_start_bpos, content_end_bpos)) - } - fn validate_char_escape(&self, start_with_quote: BytePos) { self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| { if let Err((off, err)) = unescape::unescape_char(lit) { @@ -1424,6 +1361,23 @@ impl<'a> StringReader<'a> { }); } + fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { + self.with_str_from_to(content_start, content_end, |lit: &str| { + unescape::unescape_raw_byte_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::ByteStr, + range, + err, + ) + } + }) + }); + } + fn validate_byte_str_escape(&self, start_with_quote: BytePos) { self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| { unescape::unescape_byte_str(lit, &mut |range, c| { diff --git a/src/libsyntax/parse/unescape.rs b/src/libsyntax/parse/unescape.rs index d6b7db16305..819463b5472 100644 --- a/src/libsyntax/parse/unescape.rs +++ b/src/libsyntax/parse/unescape.rs @@ -29,6 +29,7 @@ pub(crate) enum EscapeError { UnicodeEscapeInByte, NonAsciiCharInByte, + NonAsciiCharInByteString, } /// Takes a contents of a char literal (without quotes), and returns an @@ -88,6 +89,29 @@ where } } +/// Takes a contents of a string literal (without quotes) and produces a +/// sequence of characters or errors. +/// NOTE: Raw strings do not perform any explicit character escaping, here we +/// only translate CRLF to LF and produce errors on bare CR. +pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F) +where + F: FnMut(Range<usize>, Result<char, EscapeError>), +{ + let mut byte_offset: usize = 0; + + let mut chars = literal_text.chars().peekable(); + while let Some(curr) = chars.next() { + let result = match (curr, chars.peek()) { + ('\r', Some('\n')) => Ok(curr), + ('\r', _) => Err(EscapeError::BareCarriageReturn), + (c, _) if c > '\x7F' => Err(EscapeError::NonAsciiCharInByteString), + _ => Ok(curr), + }; + callback(byte_offset..(byte_offset + curr.len_utf8()), result); + byte_offset += curr.len_utf8(); + } +} + #[derive(Debug, Clone, Copy)] pub(crate) enum Mode { Char, diff --git a/src/libsyntax/parse/unescape_error_reporting.rs b/src/libsyntax/parse/unescape_error_reporting.rs index 22777c0884f..8f152974a6d 100644 --- a/src/libsyntax/parse/unescape_error_reporting.rs +++ b/src/libsyntax/parse/unescape_error_reporting.rs @@ -124,6 +124,11 @@ pub(crate) fn emit_unescape_error( handler.span_err(span, "byte constant must be ASCII. \ Use a \\xHH escape for a non-ASCII byte") } + EscapeError::NonAsciiCharInByteString => { + assert!(mode.is_bytes()); + let (_c, span) = last_char(); + handler.span_err(span, "raw byte string must be ASCII") + } EscapeError::OutOfRangeHexEscape => { handler.span_err(span, "this form of character escape may only be used \ with characters in the range [\\x00-\\x7f]") diff --git a/src/test/ui/parser/raw-byte-string-literals.rs b/src/test/ui/parser/raw-byte-string-literals.rs index 3b50fb8036a..87ecfb5c544 100644 --- a/src/test/ui/parser/raw-byte-string-literals.rs +++ b/src/test/ui/parser/raw-byte-string-literals.rs @@ -1,4 +1,7 @@ +// ignore-tidy-cr +// compile-flags: -Z continue-parse-after-error pub fn main() { + br"a "; //~ ERROR bare CR not allowed in string br"é"; //~ ERROR raw byte string must be ASCII br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation } diff --git a/src/test/ui/parser/raw-byte-string-literals.stderr b/src/test/ui/parser/raw-byte-string-literals.stderr index 671ed97d1b5..03fe79722b8 100644 --- a/src/test/ui/parser/raw-byte-string-literals.stderr +++ b/src/test/ui/parser/raw-byte-string-literals.stderr @@ -1,14 +1,20 @@ -error: raw byte string must be ASCII: \u{e9} - --> $DIR/raw-byte-string-literals.rs:2:8 +error: bare CR not allowed in string, use \r instead + --> $DIR/raw-byte-string-literals.rs:4:9 + | +LL | br"a "; + | ^ + +error: raw byte string must be ASCII + --> $DIR/raw-byte-string-literals.rs:5:8 | LL | br"é"; | ^ error: found invalid character; only `#` is allowed in raw string delimitation: ~ - --> $DIR/raw-byte-string-literals.rs:3:6 + --> $DIR/raw-byte-string-literals.rs:6:6 | LL | br##~"a"~##; | ^^^ -error: aborting due to 2 previous errors +error: aborting due to 3 previous errors |
