about summary refs log tree commit diff
path: root/src/libsyntax/parse
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2013-10-07 23:01:39 -0700
committerbors <bors@rust-lang.org>2013-10-07 23:01:39 -0700
commitc9196290af3934481bd413e11057725f248fd104 (patch)
tree64e4d5fed93cfedb791ac3abddb9cc703d337f75 /src/libsyntax/parse
parent6ddd011ce8875c6c5b119e5a8957a51d524a95da (diff)
parentd7dfe0ae34eb9a818dcbdb5646e21e721ffb3c33 (diff)
downloadrust-c9196290af3934481bd413e11057725f248fd104.tar.gz
rust-c9196290af3934481bd413e11057725f248fd104.zip
auto merge of #9674 : ben0x539/rust/raw-str, r=alexcrichton
This branch parses raw string literals as in #9411.
Diffstat (limited to 'src/libsyntax/parse')
-rw-r--r--src/libsyntax/parse/lexer.rs68
-rw-r--r--src/libsyntax/parse/parser.rs45
-rw-r--r--src/libsyntax/parse/token.rs7
3 files changed, 97 insertions, 23 deletions
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
index 79c330c4737..a43e018cf49 100644
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -213,10 +213,22 @@ fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
     (pos - rdr.filemap.start_pos)
 }
 
+/// Calls `f` with a string slice of the source text spanning from `start`
+/// up to but excluding `rdr.last_pos`, meaning the slice does not include
+/// the character `rdr.curr`.
 pub fn with_str_from<T>(rdr: @mut StringReader, start: BytePos, f: &fn(s: &str) -> T) -> T {
+    with_str_from_to(rdr, start, rdr.last_pos, f)
+}
+
+/// Calls `f` with astring slice of the source text spanning from `start`
+/// up to but excluding `end`.
+fn with_str_from_to<T>(rdr: @mut StringReader,
+                       start: BytePos,
+                       end: BytePos,
+                       f: &fn(s: &str) -> T) -> T {
     f(rdr.src.slice(
             byte_offset(rdr, start).to_uint(),
-            byte_offset(rdr, rdr.last_pos).to_uint()))
+            byte_offset(rdr, end).to_uint()))
 }
 
 // EFFECT: advance the StringReader by one character. If a newline is
@@ -612,7 +624,10 @@ fn ident_continue(c: char) -> bool {
 // EFFECT: updates the interner
 fn next_token_inner(rdr: @mut StringReader) -> token::Token {
     let c = rdr.curr;
-    if ident_start(c) {
+    if ident_start(c) && nextch(rdr) != '"' && nextch(rdr) != '#' {
+        // Note: r as in r" or r#" is part of a raw string literal,
+        // not an identifier, and is handled further down.
+
         let start = rdr.last_pos;
         while ident_continue(rdr.curr) {
             bump(rdr);
@@ -829,6 +844,47 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
         bump(rdr);
         return token::LIT_STR(str_to_ident(accum_str));
       }
+      'r' => {
+        let start_bpos = rdr.last_pos;
+        bump(rdr);
+        let mut hash_count = 0u;
+        while rdr.curr == '#' {
+            bump(rdr);
+            hash_count += 1;
+        }
+        if rdr.curr != '"' {
+            fatal_span_char(rdr, start_bpos, rdr.last_pos,
+                            ~"only `#` is allowed in raw string delimitation; \
+                              found illegal character",
+                            rdr.curr);
+        }
+        bump(rdr);
+        let content_start_bpos = rdr.last_pos;
+        let mut content_end_bpos;
+        'outer: loop {
+            if is_eof(rdr) {
+                fatal_span(rdr, start_bpos, rdr.last_pos,
+                           ~"unterminated raw string");
+            }
+            if rdr.curr == '"' {
+                content_end_bpos = rdr.last_pos;
+                for _ in range(0, hash_count) {
+                    bump(rdr);
+                    if rdr.curr != '#' {
+                        continue 'outer;
+                    }
+                }
+                break;
+            }
+            bump(rdr);
+        }
+        bump(rdr);
+        let str_content = with_str_from_to(rdr,
+                                           content_start_bpos,
+                                           content_end_bpos,
+                                           str_to_ident);
+        return token::LIT_STR_RAW(str_content, hash_count);
+      }
       '-' => {
         if nextch(rdr) == '>' {
             bump(rdr);
@@ -987,6 +1043,14 @@ mod test {
         assert_eq!(tok, token::LIFETIME(id));
     }
 
+    #[test] fn raw_string() {
+        let env = setup(@"r###\"\"#a\\b\x00c\"\"###");
+        let TokenAndSpan {tok, sp: _} =
+            env.string_reader.next_token();
+        let id = token::str_to_ident("\"#a\\b\x00c\"");
+        assert_eq!(tok, token::LIT_STR_RAW(id, 3));
+    }
+
     #[test] fn line_doc_comments() {
         assert!(!is_line_non_doc_comment("///"));
         assert!(!is_line_non_doc_comment("/// blah"));
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index cad19543608..ffebe7980bf 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -48,6 +48,7 @@ use ast::{BiRem, required};
 use ast::{ret_style, return_val, BiShl, BiShr, Stmt, StmtDecl};
 use ast::{StmtExpr, StmtSemi, StmtMac, struct_def, struct_field};
 use ast::{struct_variant_kind, BiSub};
+use ast::StrStyle;
 use ast::{sty_box, sty_region, sty_static, sty_uniq, sty_value};
 use ast::{token_tree, trait_method, trait_ref, tt_delim, tt_seq, tt_tok};
 use ast::{tt_nonterminal, tuple_variant_kind, Ty, ty_, ty_bot, ty_box};
@@ -1282,7 +1283,8 @@ impl Parser {
             token::LIT_FLOAT(s, ft) => lit_float(self.id_to_str(s), ft),
             token::LIT_FLOAT_UNSUFFIXED(s) =>
                 lit_float_unsuffixed(self.id_to_str(s)),
-            token::LIT_STR(s) => lit_str(self.id_to_str(s)),
+            token::LIT_STR(s) => lit_str(self.id_to_str(s), ast::CookedStr),
+            token::LIT_STR_RAW(s, n) => lit_str(self.id_to_str(s), ast::RawStr(n)),
             token::LPAREN => { self.expect(&token::RPAREN); lit_nil },
             _ => { self.unexpected_last(tok); }
         }
@@ -2157,7 +2159,7 @@ impl Parser {
                 // HACK: turn &[...] into a &-evec
                 ex = match e.node {
                   ExprVec(*) | ExprLit(@codemap::Spanned {
-                    node: lit_str(_), span: _
+                    node: lit_str(*), span: _
                   })
                   if m == MutImmutable => {
                     ExprVstore(e, ExprVstoreSlice)
@@ -2181,7 +2183,7 @@ impl Parser {
               ExprVec(*) | ExprRepeat(*) if m == MutMutable =>
                 ExprVstore(e, ExprVstoreMutBox),
               ExprVec(*) |
-              ExprLit(@codemap::Spanned { node: lit_str(_), span: _}) |
+              ExprLit(@codemap::Spanned { node: lit_str(*), span: _}) |
               ExprRepeat(*) if m == MutImmutable => ExprVstore(e, ExprVstoreBox),
               _ => self.mk_unary(UnBox(m), e)
             };
@@ -2194,7 +2196,7 @@ impl Parser {
             // HACK: turn ~[...] into a ~-evec
             ex = match e.node {
               ExprVec(*) |
-              ExprLit(@codemap::Spanned { node: lit_str(_), span: _}) |
+              ExprLit(@codemap::Spanned { node: lit_str(*), span: _}) |
               ExprRepeat(*) => ExprVstore(e, ExprVstoreUniq),
               _ => self.mk_unary(UnUniq, e)
             };
@@ -2706,7 +2708,7 @@ impl Parser {
             pat = match sub.node {
               PatLit(e@@Expr {
                 node: ExprLit(@codemap::Spanned {
-                    node: lit_str(_),
+                    node: lit_str(*),
                     span: _}), _
               }) => {
                 let vst = @Expr {
@@ -2734,7 +2736,7 @@ impl Parser {
             pat = match sub.node {
               PatLit(e@@Expr {
                 node: ExprLit(@codemap::Spanned {
-                    node: lit_str(_),
+                    node: lit_str(*),
                     span: _}), _
               }) => {
                 let vst = @Expr {
@@ -2763,7 +2765,7 @@ impl Parser {
               pat = match sub.node {
                   PatLit(e@@Expr {
                       node: ExprLit(@codemap::Spanned {
-                            node: lit_str(_), span: _}), _
+                            node: lit_str(*), span: _}), _
                   }) => {
                       let vst = @Expr {
                           id: ast::DUMMY_NODE_ID,
@@ -4345,7 +4347,8 @@ impl Parser {
     // parse a string as an ABI spec on an extern type or module
     fn parse_opt_abis(&self) -> Option<AbiSet> {
         match *self.token {
-            token::LIT_STR(s) => {
+            token::LIT_STR(s)
+            | token::LIT_STR_RAW(s, _) => {
                 self.bump();
                 let the_string = ident_to_str(&s);
                 let mut abis = AbiSet::empty();
@@ -4371,15 +4374,15 @@ impl Parser {
                                      abi::all_names().connect(", "),
                                      word));
                         }
-                    }
-                }
+                     }
+                 }
                 Some(abis)
             }
 
             _ => {
                 None
-            }
-        }
+             }
+         }
     }
 
     // parse one of the items or view items allowed by the
@@ -4930,17 +4933,17 @@ impl Parser {
         }
     }
 
-    pub fn parse_optional_str(&self) -> Option<@str> {
-        match *self.token {
-            token::LIT_STR(s) => {
-                self.bump();
-                Some(ident_to_str(&s))
-            }
-            _ => None
-        }
+    pub fn parse_optional_str(&self) -> Option<(@str, ast::StrStyle)> {
+        let (s, style) = match *self.token {
+            token::LIT_STR(s) => (s, ast::CookedStr),
+            token::LIT_STR_RAW(s, n) => (s, ast::RawStr(n)),
+            _ => return None
+        };
+        self.bump();
+        Some((ident_to_str(&s), style))
     }
 
-    pub fn parse_str(&self) -> @str {
+    pub fn parse_str(&self) -> (@str, StrStyle) {
         match self.parse_optional_str() {
             Some(s) => { s }
             _ =>  self.fatal("expected string literal")
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index d0faf917688..ba4c2637d10 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -79,6 +79,7 @@ pub enum Token {
     LIT_FLOAT(ast::Ident, ast::float_ty),
     LIT_FLOAT_UNSUFFIXED(ast::Ident),
     LIT_STR(ast::Ident),
+    LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
 
     /* Name components */
     // an identifier contains an "is_mod_name" boolean,
@@ -194,6 +195,10 @@ pub fn to_str(input: @ident_interner, t: &Token) -> ~str {
         body
       }
       LIT_STR(ref s) => { format!("\"{}\"", ident_to_str(s).escape_default()) }
+      LIT_STR_RAW(ref s, n) => {
+          format!("r{delim}\"{string}\"{delim}",
+                  delim="#".repeat(n), string=ident_to_str(s))
+      }
 
       /* Name components */
       IDENT(s, _) => input.get(s.name).to_owned(),
@@ -243,6 +248,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
       LIT_FLOAT(_, _) => true,
       LIT_FLOAT_UNSUFFIXED(_) => true,
       LIT_STR(_) => true,
+      LIT_STR_RAW(_, _) => true,
       POUND => true,
       AT => true,
       NOT => true,
@@ -284,6 +290,7 @@ pub fn is_lit(t: &Token) -> bool {
       LIT_FLOAT(_, _) => true,
       LIT_FLOAT_UNSUFFIXED(_) => true,
       LIT_STR(_) => true,
+      LIT_STR_RAW(_, _) => true,
       _ => false
     }
 }