diff options
| author | Simon Sapin <simon.sapin@exyr.org> | 2014-06-13 18:56:24 +0100 |
|---|---|---|
| committer | Simon Sapin <simon.sapin@exyr.org> | 2014-06-17 23:43:18 +0200 |
| commit | b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4 (patch) | |
| tree | 9a68d3b4eae31521d410062ca5ff9fc7018dc233 /src/libsyntax | |
| parent | d7e01b5809cd600a30bab29da698acb3d1b52409 (diff) | |
| download | rust-b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4.tar.gz rust-b8a4c1415b154fa1e5bd8bb54e681f0f5e21e2a4.zip | |
Add br##"xx"## raw byte string literals.
Diffstat (limited to 'src/libsyntax')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 56 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 1 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 7 |
3 files changed, 61 insertions, 3 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 59bcf059fcd..31f15fd7495 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -650,12 +650,13 @@ impl<'a> StringReader<'a> { /// token, and updates the interner fn next_token_inner(&mut self) -> token::Token { let c = self.curr; - if ident_start(c) && match (c.unwrap(), self.nextch()) { + if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) { // Note: r as in r" or r#" is part of a raw string literal, // b as in b' is part of a byte literal. // They are not identifiers, and are handled further down. - ('r', Some('"')) | ('r', Some('#')) | - ('b', Some('"')) | ('b', Some('\'')) => false, + ('r', Some('"'), _) | ('r', Some('#'), _) | + ('b', Some('"'), _) | ('b', Some('\''), _) | + ('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false, _ => true } { let start = self.last_pos; @@ -863,6 +864,7 @@ impl<'a> StringReader<'a> { return match self.curr { Some('\'') => parse_byte(self), Some('"') => parse_byte_string(self), + Some('r') => parse_raw_byte_string(self), _ => unreachable!() // Should have been a token::IDENT above. }; @@ -978,6 +980,54 @@ impl<'a> StringReader<'a> { self_.bump(); return token::LIT_BINARY(Rc::new(value)); } + + fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token { + let start_bpos = self_.last_pos; + self_.bump(); + let mut hash_count = 0u; + while self_.curr_is('#') { + self_.bump(); + hash_count += 1; + } + + if self_.is_eof() { + self_.fatal_span(start_bpos, self_.last_pos, "unterminated raw string"); + } else if !self_.curr_is('"') { + self_.fatal_span_char(start_bpos, self_.last_pos, + "only `#` is allowed in raw string delimitation; \ + found illegal character", + self_.curr.unwrap()); + } + self_.bump(); + let content_start_bpos = self_.last_pos; + let mut content_end_bpos; + 'outer: loop { + match self_.curr { + None => self_.fatal_span(start_bpos, self_.last_pos, + "unterminated raw string"), + Some('"') => { + content_end_bpos = self_.last_pos; + for _ in range(0, hash_count) { + self_.bump(); + if !self_.curr_is('#') { + continue 'outer; + } + } + break; + }, + Some(c) => if c > '\x7F' { + self_.err_span_char(self_.last_pos, self_.last_pos, + "raw byte string must be ASCII", c); + } + } + self_.bump(); + } + self_.bump(); + let bytes = self_.with_str_from_to(content_start_bpos, + content_end_bpos, + |s| s.as_bytes().to_owned()); + return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count); + } } '"' => { let mut accum_str = String::new(); diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 826d28ef3ff..ae2ec216bee 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -1529,6 +1529,7 @@ impl<'a> Parser<'a> { token::LIT_STR_RAW(s, n) => { LitStr(self.id_to_interned_str(s), ast::RawStr(n)) } + token::LIT_BINARY_RAW(ref v, _) | token::LIT_BINARY(ref v) => LitBinary(v.clone()), token::LPAREN => { self.expect(&token::RPAREN); LitNil }, _ => { self.unexpected_last(tok); } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index b76dcaf0b94..a2af417ed79 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -88,6 +88,7 @@ pub enum Token { LIT_STR(ast::Ident), LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */ LIT_BINARY(Rc<Vec<u8>>), + LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */ /* Name components */ // an identifier contains an "is_mod_name" boolean, @@ -243,6 +244,10 @@ pub fn to_str(t: &Token) -> String { "b\"{}\"", v.iter().map(|&b| b as char).collect::<String>().escape_default()) } + LIT_BINARY_RAW(ref s, n) => { + format!("br{delim}\"{string}\"{delim}", + delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii()) + } /* Name components */ IDENT(s, _) => get_ident(s).get().to_string(), @@ -298,6 +303,7 @@ pub fn can_begin_expr(t: &Token) -> bool { LIT_STR(_) => true, LIT_STR_RAW(_, _) => true, LIT_BINARY(_) => true, + LIT_BINARY_RAW(_, _) => true, POUND => true, AT => true, NOT => true, @@ -338,6 +344,7 @@ pub fn is_lit(t: &Token) -> bool { LIT_STR(_) => true, LIT_STR_RAW(_, _) => true, LIT_BINARY(_) => true, + LIT_BINARY_RAW(_, _) => true, _ => false } } |
