diff options
| author | Simon Sapin <simon.sapin@exyr.org> | 2014-06-13 18:56:24 +0100 |
|---|---|---|
| committer | Simon Sapin <simon.sapin@exyr.org> | 2014-06-17 23:43:18 +0200 |
| commit | b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4 (patch) | |
| tree | 9a68d3b4eae31521d410062ca5ff9fc7018dc233 /src | |
| parent | d7e01b5809cd600a30bab29da698acb3d1b52409 (diff) | |
| download | rust-b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4.tar.gz rust-b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4.zip | |
Add br##"xx"## raw byte string literals.
Diffstat (limited to 'src')
| -rw-r--r-- | src/librustdoc/html/highlight.rs | 2 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 56 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 1 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 7 | ||||
| -rw-r--r-- | src/test/compile-fail/raw-byte-string-eof.rs | 16 | ||||
| -rw-r--r-- | src/test/compile-fail/raw-byte-string-literals.rs | 17 | ||||
| -rw-r--r-- | src/test/run-pass/byte-literals.rs | 8 |
7 files changed, 102 insertions, 5 deletions
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 172a1be7b4e..daa9ee3da84 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -140,7 +140,7 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader, } // text literals - t::LIT_BYTE(..) | t::LIT_BINARY(..) | + t::LIT_BYTE(..) | t::LIT_BINARY(..) | t::LIT_BINARY_RAW(..) | t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string", // number literals diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 59bcf059fcd..31f15fd7495 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -650,12 +650,13 @@ impl<'a> StringReader<'a> { /// token, and updates the interner fn next_token_inner(&mut self) -> token::Token { let c = self.curr; - if ident_start(c) && match (c.unwrap(), self.nextch()) { + if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) { // Note: r as in r" or r#" is part of a raw string literal, // b as in b' is part of a byte literal. // They are not identifiers, and are handled further down. - ('r', Some('"')) | ('r', Some('#')) | - ('b', Some('"')) | ('b', Some('\'')) => false, + ('r', Some('"'), _) | ('r', Some('#'), _) | + ('b', Some('"'), _) | ('b', Some('\''), _) | + ('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false, _ => true } { let start = self.last_pos; @@ -863,6 +864,7 @@ impl<'a> StringReader<'a> { return match self.curr { Some('\'') => parse_byte(self), Some('"') => parse_byte_string(self), + Some('r') => parse_raw_byte_string(self), _ => unreachable!() // Should have been a token::IDENT above. }; @@ -978,6 +980,54 @@ impl<'a> StringReader<'a> { self_.bump(); return token::LIT_BINARY(Rc::new(value)); } + + fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token { + let start_bpos = self_.last_pos; + self_.bump(); + let mut hash_count = 0u; + while self_.curr_is('#') { + self_.bump(); + hash_count += 1; + } + + if self_.is_eof() { + self_.fatal_span(start_bpos, self_.last_pos, "unterminated raw string"); + } else if !self_.curr_is('"') { + self_.fatal_span_char(start_bpos, self_.last_pos, + "only `#` is allowed in raw string delimitation; \ + found illegal character", + self_.curr.unwrap()); + } + self_.bump(); + let content_start_bpos = self_.last_pos; + let mut content_end_bpos; + 'outer: loop { + match self_.curr { + None => self_.fatal_span(start_bpos, self_.last_pos, + "unterminated raw string"), + Some('"') => { + content_end_bpos = self_.last_pos; + for _ in range(0, hash_count) { + self_.bump(); + if !self_.curr_is('#') { + continue 'outer; + } + } + break; + }, + Some(c) => if c > '\x7F' { + self_.err_span_char(self_.last_pos, self_.last_pos, + "raw byte string must be ASCII", c); + } + } + self_.bump(); + } + self_.bump(); + let bytes = self_.with_str_from_to(content_start_bpos, + content_end_bpos, + |s| s.as_bytes().to_owned()); + return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count); + } } '"' => { let mut accum_str = String::new(); diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 826d28ef3ff..ae2ec216bee 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -1529,6 +1529,7 @@ impl<'a> Parser<'a> { token::LIT_STR_RAW(s, n) => { LitStr(self.id_to_interned_str(s), ast::RawStr(n)) } + token::LIT_BINARY_RAW(ref v, _) | token::LIT_BINARY(ref v) => LitBinary(v.clone()), token::LPAREN => { self.expect(&token::RPAREN); LitNil }, _ => { self.unexpected_last(tok); } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index b76dcaf0b94..a2af417ed79 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -88,6 +88,7 @@ pub enum Token { LIT_STR(ast::Ident), LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */ LIT_BINARY(Rc<Vec<u8>>), + LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */ /* Name components */ // an identifier contains an "is_mod_name" boolean, @@ -243,6 +244,10 @@ pub fn to_str(t: &Token) -> String { "b\"{}\"", v.iter().map(|&b| b as char).collect::<String>().escape_default()) } + LIT_BINARY_RAW(ref s, n) => { + format!("br{delim}\"{string}\"{delim}", + delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii()) + } /* Name components */ IDENT(s, _) => get_ident(s).get().to_string(), @@ -298,6 +303,7 @@ pub fn can_begin_expr(t: &Token) -> bool { LIT_STR(_) => true, LIT_STR_RAW(_, _) => true, LIT_BINARY(_) => true, + LIT_BINARY_RAW(_, _) => true, POUND => true, AT => true, NOT => true, @@ -338,6 +344,7 @@ pub fn is_lit(t: &Token) -> bool { LIT_STR(_) => true, LIT_STR_RAW(_, _) => true, LIT_BINARY(_) => true, + LIT_BINARY_RAW(_, _) => true, _ => false } } diff --git a/src/test/compile-fail/raw-byte-string-eof.rs b/src/test/compile-fail/raw-byte-string-eof.rs new file mode 100644 index 00000000000..83ea9db39b7 --- /dev/null +++ b/src/test/compile-fail/raw-byte-string-eof.rs @@ -0,0 +1,16 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +pub fn main() { + br##"a"#; //~ unterminated raw string +} + + diff --git a/src/test/compile-fail/raw-byte-string-literals.rs b/src/test/compile-fail/raw-byte-string-literals.rs new file mode 100644 index 00000000000..7a3d1b2318a --- /dev/null +++ b/src/test/compile-fail/raw-byte-string-literals.rs @@ -0,0 +1,17 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +pub fn main() { + br"é"; //~ raw byte string must be ASCII + br##~"a"~##; //~ only `#` is allowed in raw string delimitation +} + + diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs index 58df7dc8efd..5317fdc391f 100644 --- a/src/test/run-pass/byte-literals.rs +++ b/src/test/run-pass/byte-literals.rs @@ -11,6 +11,7 @@ static FOO: u8 = b'\xF0'; static BAR: &'static [u8] = b"a\xF0\t"; +static BAZ: &'static [u8] = br"a\n"; pub fn main() { assert_eq!(b'a', 97u8); @@ -24,7 +25,6 @@ pub fn main() { assert_eq!(b'\xF0', 240u8); assert_eq!(FOO, 240u8); - // FIXME: Do we want this to be valid? assert_eq!([42, ..b'\t'].as_slice(), &[42, 42, 42, 42, 42, 42, 42, 42, 42]); match 42 { @@ -47,4 +47,10 @@ pub fn main() { b"a\n" => {}, _ => fail!(), } + + assert_eq!(BAZ, &[97u8, 92u8, 110u8]); + assert_eq!(br"a\n", &[97u8, 92u8, 110u8]); + assert_eq!(br"a\n", b"a\\n"); + assert_eq!(br###"a"##b"###, &[97u8, 34u8, 35u8, 35u8, 98u8]); + assert_eq!(br###"a"##b"###, b"a\"##b"); } |
