about summary refs log tree commit diff
path: root/src/libsyntax/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/libsyntax/parse')
-rw-r--r--src/libsyntax/parse/lexer/mod.rs56
-rw-r--r--src/libsyntax/parse/parser.rs1
-rw-r--r--src/libsyntax/parse/token.rs7
3 files changed, 61 insertions, 3 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 59bcf059fcd..31f15fd7495 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -650,12 +650,13 @@ impl<'a> StringReader<'a> {
     /// token, and updates the interner
     fn next_token_inner(&mut self) -> token::Token {
         let c = self.curr;
-        if ident_start(c) && match (c.unwrap(), self.nextch()) {
+        if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) {
             // Note: r as in r" or r#" is part of a raw string literal,
             // b as in b' is part of a byte literal.
             // They are not identifiers, and are handled further down.
-           ('r', Some('"')) | ('r', Some('#')) |
-           ('b', Some('"')) | ('b', Some('\'')) => false,
+           ('r', Some('"'), _) | ('r', Some('#'), _) |
+           ('b', Some('"'), _) | ('b', Some('\''), _) |
+           ('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false,
            _ => true
         } {
             let start = self.last_pos;
@@ -863,6 +864,7 @@ impl<'a> StringReader<'a> {
             return match self.curr {
                 Some('\'') => parse_byte(self),
                 Some('"') => parse_byte_string(self),
+                Some('r') => parse_raw_byte_string(self),
                 _ => unreachable!()  // Should have been a token::IDENT above.
             };
 
@@ -978,6 +980,54 @@ impl<'a> StringReader<'a> {
                 self_.bump();
                 return token::LIT_BINARY(Rc::new(value));
             }
+
+            fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token {
+                let start_bpos = self_.last_pos;
+                self_.bump();
+                let mut hash_count = 0u;
+                while self_.curr_is('#') {
+                    self_.bump();
+                    hash_count += 1;
+                }
+
+                if self_.is_eof() {
+                    self_.fatal_span(start_bpos, self_.last_pos, "unterminated raw string");
+                } else if !self_.curr_is('"') {
+                    self_.fatal_span_char(start_bpos, self_.last_pos,
+                                    "only `#` is allowed in raw string delimitation; \
+                                     found illegal character",
+                                    self_.curr.unwrap());
+                }
+                self_.bump();
+                let content_start_bpos = self_.last_pos;
+                let mut content_end_bpos;
+                'outer: loop {
+                    match self_.curr {
+                        None => self_.fatal_span(start_bpos, self_.last_pos,
+                                                 "unterminated raw string"),
+                        Some('"') => {
+                            content_end_bpos = self_.last_pos;
+                            for _ in range(0, hash_count) {
+                                self_.bump();
+                                if !self_.curr_is('#') {
+                                    continue 'outer;
+                                }
+                            }
+                            break;
+                        },
+                        Some(c) => if c > '\x7F' {
+                            self_.err_span_char(self_.last_pos, self_.last_pos,
+                                                "raw byte string must be ASCII", c);
+                        }
+                    }
+                    self_.bump();
+                }
+                self_.bump();
+                let bytes = self_.with_str_from_to(content_start_bpos,
+                                                   content_end_bpos,
+                                                   |s| s.as_bytes().to_owned());
+                return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
+            }
           }
           '"' => {
             let mut accum_str = String::new();
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index 826d28ef3ff..ae2ec216bee 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -1529,6 +1529,7 @@ impl<'a> Parser<'a> {
             token::LIT_STR_RAW(s, n) => {
                 LitStr(self.id_to_interned_str(s), ast::RawStr(n))
             }
+            token::LIT_BINARY_RAW(ref v, _) |
             token::LIT_BINARY(ref v) => LitBinary(v.clone()),
             token::LPAREN => { self.expect(&token::RPAREN); LitNil },
             _ => { self.unexpected_last(tok); }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index b76dcaf0b94..a2af417ed79 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -88,6 +88,7 @@ pub enum Token {
     LIT_STR(ast::Ident),
     LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
     LIT_BINARY(Rc<Vec<u8>>),
+    LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */
 
     /* Name components */
     // an identifier contains an "is_mod_name" boolean,
@@ -243,6 +244,10 @@ pub fn to_str(t: &Token) -> String {
             "b\"{}\"",
             v.iter().map(|&b| b as char).collect::<String>().escape_default())
       }
+      LIT_BINARY_RAW(ref s, n) => {
+        format!("br{delim}\"{string}\"{delim}",
+                 delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii())
+      }
 
       /* Name components */
       IDENT(s, _) => get_ident(s).get().to_string(),
@@ -298,6 +303,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
       LIT_BINARY(_) => true,
+      LIT_BINARY_RAW(_, _) => true,
       POUND => true,
       AT => true,
       NOT => true,
@@ -338,6 +344,7 @@ pub fn is_lit(t: &Token) -> bool {
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
       LIT_BINARY(_) => true,
+      LIT_BINARY_RAW(_, _) => true,
       _ => false
     }
 }