diff options
| author | bors <bors@rust-lang.org> | 2013-10-07 23:01:39 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2013-10-07 23:01:39 -0700 |
| commit | c9196290af3934481bd413e11057725f248fd104 (patch) | |
| tree | 64e4d5fed93cfedb791ac3abddb9cc703d337f75 /src/libsyntax/parse | |
| parent | 6ddd011ce8875c6c5b119e5a8957a51d524a95da (diff) | |
| parent | d7dfe0ae34eb9a818dcbdb5646e21e721ffb3c33 (diff) | |
| download | rust-c9196290af3934481bd413e11057725f248fd104.tar.gz rust-c9196290af3934481bd413e11057725f248fd104.zip | |
auto merge of #9674 : ben0x539/rust/raw-str, r=alexcrichton
This branch parses raw string literals as in #9411.
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer.rs | 68 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 45 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 7 |
3 files changed, 97 insertions, 23 deletions
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 79c330c4737..a43e018cf49 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -213,10 +213,22 @@ fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos { (pos - rdr.filemap.start_pos) } +/// Calls `f` with a string slice of the source text spanning from `start` +/// up to but excluding `rdr.last_pos`, meaning the slice does not include +/// the character `rdr.curr`. pub fn with_str_from<T>(rdr: @mut StringReader, start: BytePos, f: &fn(s: &str) -> T) -> T { + with_str_from_to(rdr, start, rdr.last_pos, f) +} + +/// Calls `f` with astring slice of the source text spanning from `start` +/// up to but excluding `end`. +fn with_str_from_to<T>(rdr: @mut StringReader, + start: BytePos, + end: BytePos, + f: &fn(s: &str) -> T) -> T { f(rdr.src.slice( byte_offset(rdr, start).to_uint(), - byte_offset(rdr, rdr.last_pos).to_uint())) + byte_offset(rdr, end).to_uint())) } // EFFECT: advance the StringReader by one character. If a newline is @@ -612,7 +624,10 @@ fn ident_continue(c: char) -> bool { // EFFECT: updates the interner fn next_token_inner(rdr: @mut StringReader) -> token::Token { let c = rdr.curr; - if ident_start(c) { + if ident_start(c) && nextch(rdr) != '"' && nextch(rdr) != '#' { + // Note: r as in r" or r#" is part of a raw string literal, + // not an identifier, and is handled further down. + let start = rdr.last_pos; while ident_continue(rdr.curr) { bump(rdr); @@ -829,6 +844,47 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { bump(rdr); return token::LIT_STR(str_to_ident(accum_str)); } + 'r' => { + let start_bpos = rdr.last_pos; + bump(rdr); + let mut hash_count = 0u; + while rdr.curr == '#' { + bump(rdr); + hash_count += 1; + } + if rdr.curr != '"' { + fatal_span_char(rdr, start_bpos, rdr.last_pos, + ~"only `#` is allowed in raw string delimitation; \ + found illegal character", + rdr.curr); + } + bump(rdr); + let content_start_bpos = rdr.last_pos; + let mut content_end_bpos; + 'outer: loop { + if is_eof(rdr) { + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated raw string"); + } + if rdr.curr == '"' { + content_end_bpos = rdr.last_pos; + for _ in range(0, hash_count) { + bump(rdr); + if rdr.curr != '#' { + continue 'outer; + } + } + break; + } + bump(rdr); + } + bump(rdr); + let str_content = with_str_from_to(rdr, + content_start_bpos, + content_end_bpos, + str_to_ident); + return token::LIT_STR_RAW(str_content, hash_count); + } '-' => { if nextch(rdr) == '>' { bump(rdr); @@ -987,6 +1043,14 @@ mod test { assert_eq!(tok, token::LIFETIME(id)); } + #[test] fn raw_string() { + let env = setup(@"r###\"\"#a\\b\x00c\"\"###"); + let TokenAndSpan {tok, sp: _} = + env.string_reader.next_token(); + let id = token::str_to_ident("\"#a\\b\x00c\""); + assert_eq!(tok, token::LIT_STR_RAW(id, 3)); + } + #[test] fn line_doc_comments() { assert!(!is_line_non_doc_comment("///")); assert!(!is_line_non_doc_comment("/// blah")); diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index cad19543608..ffebe7980bf 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -48,6 +48,7 @@ use ast::{BiRem, required}; use ast::{ret_style, return_val, BiShl, BiShr, Stmt, StmtDecl}; use ast::{StmtExpr, StmtSemi, StmtMac, struct_def, struct_field}; use ast::{struct_variant_kind, BiSub}; +use ast::StrStyle; use ast::{sty_box, sty_region, sty_static, sty_uniq, sty_value}; use ast::{token_tree, trait_method, trait_ref, tt_delim, tt_seq, tt_tok}; use ast::{tt_nonterminal, tuple_variant_kind, Ty, ty_, ty_bot, ty_box}; @@ -1282,7 +1283,8 @@ impl Parser { token::LIT_FLOAT(s, ft) => lit_float(self.id_to_str(s), ft), token::LIT_FLOAT_UNSUFFIXED(s) => lit_float_unsuffixed(self.id_to_str(s)), - token::LIT_STR(s) => lit_str(self.id_to_str(s)), + token::LIT_STR(s) => lit_str(self.id_to_str(s), ast::CookedStr), + token::LIT_STR_RAW(s, n) => lit_str(self.id_to_str(s), ast::RawStr(n)), token::LPAREN => { self.expect(&token::RPAREN); lit_nil }, _ => { self.unexpected_last(tok); } } @@ -2157,7 +2159,7 @@ impl Parser { // HACK: turn &[...] into a &-evec ex = match e.node { ExprVec(*) | ExprLit(@codemap::Spanned { - node: lit_str(_), span: _ + node: lit_str(*), span: _ }) if m == MutImmutable => { ExprVstore(e, ExprVstoreSlice) @@ -2181,7 +2183,7 @@ impl Parser { ExprVec(*) | ExprRepeat(*) if m == MutMutable => ExprVstore(e, ExprVstoreMutBox), ExprVec(*) | - ExprLit(@codemap::Spanned { node: lit_str(_), span: _}) | + ExprLit(@codemap::Spanned { node: lit_str(*), span: _}) | ExprRepeat(*) if m == MutImmutable => ExprVstore(e, ExprVstoreBox), _ => self.mk_unary(UnBox(m), e) }; @@ -2194,7 +2196,7 @@ impl Parser { // HACK: turn ~[...] into a ~-evec ex = match e.node { ExprVec(*) | - ExprLit(@codemap::Spanned { node: lit_str(_), span: _}) | + ExprLit(@codemap::Spanned { node: lit_str(*), span: _}) | ExprRepeat(*) => ExprVstore(e, ExprVstoreUniq), _ => self.mk_unary(UnUniq, e) }; @@ -2706,7 +2708,7 @@ impl Parser { pat = match sub.node { PatLit(e@@Expr { node: ExprLit(@codemap::Spanned { - node: lit_str(_), + node: lit_str(*), span: _}), _ }) => { let vst = @Expr { @@ -2734,7 +2736,7 @@ impl Parser { pat = match sub.node { PatLit(e@@Expr { node: ExprLit(@codemap::Spanned { - node: lit_str(_), + node: lit_str(*), span: _}), _ }) => { let vst = @Expr { @@ -2763,7 +2765,7 @@ impl Parser { pat = match sub.node { PatLit(e@@Expr { node: ExprLit(@codemap::Spanned { - node: lit_str(_), span: _}), _ + node: lit_str(*), span: _}), _ }) => { let vst = @Expr { id: ast::DUMMY_NODE_ID, @@ -4345,7 +4347,8 @@ impl Parser { // parse a string as an ABI spec on an extern type or module fn parse_opt_abis(&self) -> Option<AbiSet> { match *self.token { - token::LIT_STR(s) => { + token::LIT_STR(s) + | token::LIT_STR_RAW(s, _) => { self.bump(); let the_string = ident_to_str(&s); let mut abis = AbiSet::empty(); @@ -4371,15 +4374,15 @@ impl Parser { abi::all_names().connect(", "), word)); } - } - } + } + } Some(abis) } _ => { None - } - } + } + } } // parse one of the items or view items allowed by the @@ -4930,17 +4933,17 @@ impl Parser { } } - pub fn parse_optional_str(&self) -> Option<@str> { - match *self.token { - token::LIT_STR(s) => { - self.bump(); - Some(ident_to_str(&s)) - } - _ => None - } + pub fn parse_optional_str(&self) -> Option<(@str, ast::StrStyle)> { + let (s, style) = match *self.token { + token::LIT_STR(s) => (s, ast::CookedStr), + token::LIT_STR_RAW(s, n) => (s, ast::RawStr(n)), + _ => return None + }; + self.bump(); + Some((ident_to_str(&s), style)) } - pub fn parse_str(&self) -> @str { + pub fn parse_str(&self) -> (@str, StrStyle) { match self.parse_optional_str() { Some(s) => { s } _ => self.fatal("expected string literal") diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index d0faf917688..ba4c2637d10 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -79,6 +79,7 @@ pub enum Token { LIT_FLOAT(ast::Ident, ast::float_ty), LIT_FLOAT_UNSUFFIXED(ast::Ident), LIT_STR(ast::Ident), + LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */ /* Name components */ // an identifier contains an "is_mod_name" boolean, @@ -194,6 +195,10 @@ pub fn to_str(input: @ident_interner, t: &Token) -> ~str { body } LIT_STR(ref s) => { format!("\"{}\"", ident_to_str(s).escape_default()) } + LIT_STR_RAW(ref s, n) => { + format!("r{delim}\"{string}\"{delim}", + delim="#".repeat(n), string=ident_to_str(s)) + } /* Name components */ IDENT(s, _) => input.get(s.name).to_owned(), @@ -243,6 +248,7 @@ pub fn can_begin_expr(t: &Token) -> bool { LIT_FLOAT(_, _) => true, LIT_FLOAT_UNSUFFIXED(_) => true, LIT_STR(_) => true, + LIT_STR_RAW(_, _) => true, POUND => true, AT => true, NOT => true, @@ -284,6 +290,7 @@ pub fn is_lit(t: &Token) -> bool { LIT_FLOAT(_, _) => true, LIT_FLOAT_UNSUFFIXED(_) => true, LIT_STR(_) => true, + LIT_STR_RAW(_, _) => true, _ => false } } |
