Add br##"xx"## raw byte string literals.

author: Simon Sapin <simon.sapin@exyr.org> 2014-06-13 18:56:24 +0100
committer: Simon Sapin <simon.sapin@exyr.org> 2014-06-17 23:43:18 +0200
commit: b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4 (patch)
tree: 9a68d3b4eae31521d410062ca5ff9fc7018dc233 /src
parent: d7e01b5809cd600a30bab29da698acb3d1b52409 (diff)
download: rust-b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4.tar.gz
rust-b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4.zip
7 files changed, 102 insertions, 5 deletions
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index 172a1be7b4e..daa9ee3da84 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -140,7 +140,7 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
             }
 
             // text literals
-            t::LIT_BYTE(..) | t::LIT_BINARY(..) |
+            t::LIT_BYTE(..) | t::LIT_BINARY(..) | t::LIT_BINARY_RAW(..) |
                 t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
 
             // number literals
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 59bcf059fcd..31f15fd7495 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -650,12 +650,13 @@ impl<'a> StringReader<'a> {
     /// token, and updates the interner
     fn next_token_inner(&mut self) -> token::Token {
         let c = self.curr;
-        if ident_start(c) && match (c.unwrap(), self.nextch()) {
+        if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) {
             // Note: r as in r" or r#" is part of a raw string literal,
             // b as in b' is part of a byte literal.
             // They are not identifiers, and are handled further down.
-           ('r', Some('"')) | ('r', Some('#')) |
-           ('b', Some('"')) | ('b', Some('\'')) => false,
+           ('r', Some('"'), _) | ('r', Some('#'), _) |
+           ('b', Some('"'), _) | ('b', Some('\''), _) |
+           ('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false,
            _ => true
         } {
             let start = self.last_pos;
@@ -863,6 +864,7 @@ impl<'a> StringReader<'a> {
             return match self.curr {
                 Some('\'') => parse_byte(self),
                 Some('"') => parse_byte_string(self),
+                Some('r') => parse_raw_byte_string(self),
                 _ => unreachable!()  // Should have been a token::IDENT above.
             };
 
@@ -978,6 +980,54 @@ impl<'a> StringReader<'a> {
                 self_.bump();
                 return token::LIT_BINARY(Rc::new(value));
             }
+
+            fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token {
+                let start_bpos = self_.last_pos;
+                self_.bump();
+                let mut hash_count = 0u;
+                while self_.curr_is('#') {
+                    self_.bump();
+                    hash_count += 1;
+                }
+
+                if self_.is_eof() {
+                    self_.fatal_span(start_bpos, self_.last_pos, "unterminated raw string");
+                } else if !self_.curr_is('"') {
+                    self_.fatal_span_char(start_bpos, self_.last_pos,
+                                    "only `#` is allowed in raw string delimitation; \
+                                     found illegal character",
+                                    self_.curr.unwrap());
+                }
+                self_.bump();
+                let content_start_bpos = self_.last_pos;
+                let mut content_end_bpos;
+                'outer: loop {
+                    match self_.curr {
+                        None => self_.fatal_span(start_bpos, self_.last_pos,
+                                                 "unterminated raw string"),
+                        Some('"') => {
+                            content_end_bpos = self_.last_pos;
+                            for _ in range(0, hash_count) {
+                                self_.bump();
+                                if !self_.curr_is('#') {
+                                    continue 'outer;
+                                }
+                            }
+                            break;
+                        },
+                        Some(c) => if c > '\x7F' {
+                            self_.err_span_char(self_.last_pos, self_.last_pos,
+                                                "raw byte string must be ASCII", c);
+                        }
+                    }
+                    self_.bump();
+                }
+                self_.bump();
+                let bytes = self_.with_str_from_to(content_start_bpos,
+                                                   content_end_bpos,
+                                                   |s| s.as_bytes().to_owned());
+                return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
+            }
           }
           '"' => {
             let mut accum_str = String::new();
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index 826d28ef3ff..ae2ec216bee 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -1529,6 +1529,7 @@ impl<'a> Parser<'a> {
             token::LIT_STR_RAW(s, n) => {
                 LitStr(self.id_to_interned_str(s), ast::RawStr(n))
             }
+            token::LIT_BINARY_RAW(ref v, _) |
             token::LIT_BINARY(ref v) => LitBinary(v.clone()),
             token::LPAREN => { self.expect(&token::RPAREN); LitNil },
             _ => { self.unexpected_last(tok); }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index b76dcaf0b94..a2af417ed79 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -88,6 +88,7 @@ pub enum Token {
     LIT_STR(ast::Ident),
     LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
     LIT_BINARY(Rc<Vec<u8>>),
+    LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */
 
     /* Name components */
     // an identifier contains an "is_mod_name" boolean,
@@ -243,6 +244,10 @@ pub fn to_str(t: &Token) -> String {
             "b\"{}\"",
             v.iter().map(|&b| b as char).collect::<String>().escape_default())
       }
+      LIT_BINARY_RAW(ref s, n) => {
+        format!("br{delim}\"{string}\"{delim}",
+                 delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii())
+      }
 
       /* Name components */
       IDENT(s, _) => get_ident(s).get().to_string(),
@@ -298,6 +303,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
       LIT_BINARY(_) => true,
+      LIT_BINARY_RAW(_, _) => true,
       POUND => true,
       AT => true,
       NOT => true,
@@ -338,6 +344,7 @@ pub fn is_lit(t: &Token) -> bool {
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
       LIT_BINARY(_) => true,
+      LIT_BINARY_RAW(_, _) => true,
       _ => false
     }
 }
diff --git a/src/test/compile-fail/raw-byte-string-eof.rs b/src/test/compile-fail/raw-byte-string-eof.rs
new file mode 100644
index 00000000000..83ea9db39b7
--- /dev/null
+++ b/src/test/compile-fail/raw-byte-string-eof.rs
@@ -0,0 +1,16 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+pub fn main() {
+    br##"a"#;  //~ unterminated raw string
+}
+
+
diff --git a/src/test/compile-fail/raw-byte-string-literals.rs b/src/test/compile-fail/raw-byte-string-literals.rs
new file mode 100644
index 00000000000..7a3d1b2318a
--- /dev/null
+++ b/src/test/compile-fail/raw-byte-string-literals.rs
@@ -0,0 +1,17 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+pub fn main() {
+    br"é";  //~ raw byte string must be ASCII
+    br##~"a"~##;  //~ only `#` is allowed in raw string delimitation
+}
+
+
diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs
index 58df7dc8efd..5317fdc391f 100644
--- a/src/test/run-pass/byte-literals.rs
+++ b/src/test/run-pass/byte-literals.rs
@@ -11,6 +11,7 @@
 
 static FOO: u8 = b'\xF0';
 static BAR: &'static [u8] = b"a\xF0\t";
+static BAZ: &'static [u8] = br"a\n";
 
 pub fn main() {
     assert_eq!(b'a', 97u8);
@@ -24,7 +25,6 @@ pub fn main() {
     assert_eq!(b'\xF0', 240u8);
     assert_eq!(FOO, 240u8);
 
-    // FIXME: Do we want this to be valid?
     assert_eq!([42, ..b'\t'].as_slice(), &[42, 42, 42, 42, 42, 42, 42, 42, 42]);
 
     match 42 {
@@ -47,4 +47,10 @@ pub fn main() {
         b"a\n" => {},
         _ => fail!(),
     }
+
+    assert_eq!(BAZ, &[97u8, 92u8, 110u8]);
+    assert_eq!(br"a\n", &[97u8, 92u8, 110u8]);
+    assert_eq!(br"a\n", b"a\\n");
+    assert_eq!(br###"a"##b"###, &[97u8, 34u8, 35u8, 35u8, 98u8]);
+    assert_eq!(br###"a"##b"###, b"a\"##b");
 }
author	Simon Sapin <simon.sapin@exyr.org>	2014-06-13 18:56:24 +0100
committer	Simon Sapin <simon.sapin@exyr.org>	2014-06-17 23:43:18 +0200
commit	b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4 (patch)
tree	9a68d3b4eae31521d410062ca5ff9fc7018dc233 /src
parent	d7e01b5809cd600a30bab29da698acb3d1b52409 (diff)
download	rust-b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4.tar.gz rust-b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4.zip