quote_expr macro: embed Ident using special encoding that preserves hygiene.

This adds support to `quote_expr!` and friends for round-trip hygienic preservation of Ident. Here are the pieces of the puzzle: * adding a method for encoding Ident for re-reading into token tree. * Support for reading such encoded Idents in the lexer. Note that one must peek ahead for MOD_SEP after scan_embedded_hygienic_ident. * To ensure that encoded Idents are only read when we are in the midst of expanding a `quote_expr` or similar, added a `read_embedded_ident` flag on `StringReader`. * pprust support for encoding Ident's as (uint,uint) pairs (for hygiene).
author: Felix S. Klock II <pnkfelix@pnkfx.org> 2014-08-01 17:11:53 +0200
committer: Felix S. Klock II <pnkfelix@pnkfx.org> 2014-08-13 17:40:15 +0200
commit: c3ce245ba68f62edfc5818f003b2b78a02ce5c03 (patch)
tree: 0be1530cc0bd24d38a1f8f9d80d196e75be26a60 /src/libsyntax/parse
parent: 9d554212de0398ac044e6d815da3bfb184831e77 (diff)
download: rust-c3ce245ba68f62edfc5818f003b2b78a02ce5c03.tar.gz
rust-c3ce245ba68f62edfc5818f003b2b78a02ce5c03.zip
2 files changed, 174 insertions, 0 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 625c03ec13d..e5275af5cca 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -17,7 +17,9 @@ use parse::token;
 use parse::token::{str_to_ident};
 
 use std::char;
+use std::fmt;
 use std::mem::replace;
+use std::num;
 use std::rc::Rc;
 use std::str;
 
@@ -55,6 +57,11 @@ pub struct StringReader<'a> {
     /* cached: */
     pub peek_tok: token::Token,
     pub peek_span: Span,
+
+    // FIXME (Issue #16472): This field should go away after ToToken impls
+    // are revised to go directly to token-trees.
+    /// Is \x00<name>,<ctxt>\x00 is interpreted as encoded ast::Ident?
+    read_embedded_ident: bool,
 }
 
 impl<'a> Reader for StringReader<'a> {
@@ -106,6 +113,17 @@ impl<'a> Reader for TtReader<'a> {
     }
 }
 
+// FIXME (Issue #16472): This function should go away after
+// ToToken impls are revised to go directly to token-trees.
+pub fn make_reader_with_embedded_idents<'b>(span_diagnostic: &'b SpanHandler,
+                                            filemap: Rc<codemap::FileMap>)
+                                            -> StringReader<'b> {
+    let mut sr = StringReader::new_raw(span_diagnostic, filemap);
+    sr.read_embedded_ident = true;
+    sr.advance_token();
+    sr
+}
+
 impl<'a> StringReader<'a> {
     /// For comments.rs, which hackily pokes into pos and curr
     pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler,
@@ -120,6 +138,7 @@ impl<'a> StringReader<'a> {
             /* dummy values; not read */
             peek_tok: token::EOF,
             peek_span: codemap::DUMMY_SP,
+            read_embedded_ident: false,
         };
         sr.bump();
         sr
@@ -512,6 +531,81 @@ impl<'a> StringReader<'a> {
         })
     }
 
+    // FIXME (Issue #16472): The scan_embedded_hygienic_ident function
+    // should go away after we revise the syntax::ext::quote::ToToken
+    // impls to go directly to token-trees instead of thing -> string
+    // -> token-trees.  (The function is currently used to resolve
+    // Issues #15750 and #15962.)
+    //
+    // Since this function is only used for certain internal macros,
+    // and the functionality it provides is not exposed to end user
+    // programs, pnkfelix deliberately chose to write it in a way that
+    // favors rustc debugging effectiveness over runtime efficiency.
+
+    /// Scan through input of form \x00name_NNNNNN,ctxt_CCCCCCC\x00
+    /// where: `NNNNNN` is a string of characters forming an integer
+    /// (the name) and `CCCCCCC` is a string of characters forming an
+    /// integer (the ctxt), separate by a comma and delimited by a
+    /// `\x00` marker.
+    #[inline(never)]
+    fn scan_embedded_hygienic_ident(&mut self) -> ast::Ident {
+        fn bump_expecting_char<'a,D:fmt::Show>(r: &mut StringReader<'a>,
+                                               c: char,
+                                               described_c: D,
+                                               where: &str) {
+            match r.curr {
+                Some(r_c) if r_c == c => r.bump(),
+                Some(r_c) => fail!("expected {}, hit {}, {}", described_c, r_c, where),
+                None      => fail!("expected {}, hit EOF, {}", described_c, where),
+            }
+        }
+
+        let where = "while scanning embedded hygienic ident";
+
+        // skip over the leading `\x00`
+        bump_expecting_char(self, '\x00', "nul-byte", where);
+
+        // skip over the "name_"
+        for c in "name_".chars() {
+            bump_expecting_char(self, c, c, where);
+        }
+
+        let start_bpos = self.last_pos;
+        let base = 10;
+
+        // find the integer representing the name
+        self.scan_digits(base);
+        let encoded_name : u32 = self.with_str_from(start_bpos, |s| {
+            num::from_str_radix(s, 10).unwrap_or_else(|| {
+                fail!("expected digits representing a name, got `{}`, {}, range [{},{}]",
+                      s, where, start_bpos, self.last_pos);
+            })
+        });
+
+        // skip over the `,`
+        bump_expecting_char(self, ',', "comma", where);
+
+        // skip over the "ctxt_"
+        for c in "ctxt_".chars() {
+            bump_expecting_char(self, c, c, where);
+        }
+
+        // find the integer representing the ctxt
+        let start_bpos = self.last_pos;
+        self.scan_digits(base);
+        let encoded_ctxt : ast::SyntaxContext = self.with_str_from(start_bpos, |s| {
+            num::from_str_radix(s, 10).unwrap_or_else(|| {
+                fail!("expected digits representing a ctxt, got `{}`, {}", s, where);
+            })
+        });
+
+        // skip over the `\x00`
+        bump_expecting_char(self, '\x00', "nul-byte", where);
+
+        ast::Ident { name: ast::Name(encoded_name),
+                     ctxt: encoded_ctxt, }
+    }
+
     /// Scan through any digits (base `radix`) or underscores, and return how
     /// many digits there were.
     fn scan_digits(&mut self, radix: uint) -> uint {
@@ -839,6 +933,17 @@ impl<'a> StringReader<'a> {
             return self.scan_number(c.unwrap());
         }
 
+        if self.read_embedded_ident {
+            match (c.unwrap(), self.nextch(), self.nextnextch()) {
+                ('\x00', Some('n'), Some('a')) => {
+                    let ast_ident = self.scan_embedded_hygienic_ident();
+                    let is_mod_name = self.curr_is(':') && self.nextch_is(':');
+                    return token::IDENT(ast_ident, is_mod_name);
+                }
+                _ => {}
+            }
+        }
+
         match c.expect("next_token_inner called at EOF") {
           // One-byte tokens.
           ';' => { self.bump(); return token::SEMI; }
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
index 5b70ed609d9..af1f296a6ca 100644
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -144,6 +144,8 @@ pub fn parse_stmt_from_source_str(name: String,
     maybe_aborted(p.parse_stmt(attrs),p)
 }
 
+// Note: keep in sync with `with_hygiene::parse_tts_from_source_str`
+// until #16472 is resolved.
 pub fn parse_tts_from_source_str(name: String,
                                  source: String,
                                  cfg: ast::CrateConfig,
@@ -160,6 +162,8 @@ pub fn parse_tts_from_source_str(name: String,
     maybe_aborted(p.parse_all_token_trees(),p)
 }
 
+// Note: keep in sync with `with_hygiene::new_parser_from_source_str`
+// until #16472 is resolved.
 // Create a new parser from a source string
 pub fn new_parser_from_source_str<'a>(sess: &'a ParseSess,
                                       cfg: ast::CrateConfig,
@@ -192,6 +196,8 @@ pub fn new_sub_parser_from_file<'a>(sess: &'a ParseSess,
     p
 }
 
+// Note: keep this in sync with `with_hygiene::filemap_to_parser` until
+// #16472 is resolved.
 /// Given a filemap and config, return a parser
 pub fn filemap_to_parser<'a>(sess: &'a ParseSess,
                              filemap: Rc<FileMap>,
@@ -248,6 +254,8 @@ pub fn string_to_filemap(sess: &ParseSess, source: String, path: String)
     sess.span_diagnostic.cm.new_filemap(path, source)
 }
 
+// Note: keep this in sync with `with_hygiene::filemap_to_tts` (apart
+// from the StringReader constructor), until #16472 is resolved.
 /// Given a filemap, produce a sequence of token-trees
 pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
     -> Vec<ast::TokenTree> {
@@ -267,6 +275,67 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess,
     Parser::new(sess, cfg, box trdr)
 }
 
+// FIXME (Issue #16472): The `with_hygiene` mod should go away after
+// ToToken impls are revised to go directly to token-trees.
+pub mod with_hygiene {
+    use ast;
+    use codemap::FileMap;
+    use parse::parser::Parser;
+    use std::rc::Rc;
+    use super::ParseSess;
+    use super::{maybe_aborted, string_to_filemap, tts_to_parser};
+
+    // Note: keep this in sync with `super::parse_tts_from_source_str` until
+    // #16472 is resolved.
+    pub fn parse_tts_from_source_str(name: String,
+                                     source: String,
+                                     cfg: ast::CrateConfig,
+                                     sess: &ParseSess) -> Vec<ast::TokenTree> {
+        let mut p = new_parser_from_source_str(
+            sess,
+            cfg,
+            name,
+            source
+        );
+        p.quote_depth += 1u;
+        // right now this is re-creating the token trees from ... token trees.
+        maybe_aborted(p.parse_all_token_trees(),p)
+    }
+
+    // Note: keep this in sync with `super::new_parser_from_source_str` until
+    // #16472 is resolved.
+    // Create a new parser from a source string
+    fn new_parser_from_source_str<'a>(sess: &'a ParseSess,
+                                      cfg: ast::CrateConfig,
+                                      name: String,
+                                      source: String) -> Parser<'a> {
+        filemap_to_parser(sess, string_to_filemap(sess, source, name), cfg)
+    }
+
+    // Note: keep this in sync with `super::filemap_to_parserr` until
+    // #16472 is resolved.
+    /// Given a filemap and config, return a parser
+    fn filemap_to_parser<'a>(sess: &'a ParseSess,
+                             filemap: Rc<FileMap>,
+                             cfg: ast::CrateConfig) -> Parser<'a> {
+        tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg)
+    }
+
+    // Note: keep this in sync with `super::filemap_to_tts` until
+    // #16472 is resolved.
+    /// Given a filemap, produce a sequence of token-trees
+    fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
+                      -> Vec<ast::TokenTree> {
+        // it appears to me that the cfg doesn't matter here... indeed,
+        // parsing tt's probably shouldn't require a parser at all.
+        use make_reader = super::lexer::make_reader_with_embedded_idents;
+        let cfg = Vec::new();
+        let srdr = make_reader(&sess.span_diagnostic, filemap);
+        let mut p1 = Parser::new(sess, cfg, box srdr);
+        p1.parse_all_token_trees()
+    }
+}
+
 /// Abort if necessary
 pub fn maybe_aborted<T>(result: T, mut p: Parser) -> T {
     p.abort_if_errors();
author	Felix S. Klock II <pnkfelix@pnkfx.org>	2014-08-01 17:11:53 +0200
committer	Felix S. Klock II <pnkfelix@pnkfx.org>	2014-08-13 17:40:15 +0200
commit	c3ce245ba68f62edfc5818f003b2b78a02ce5c03 (patch)
tree	0be1530cc0bd24d38a1f8f9d80d196e75be26a60 /src/libsyntax/parse
parent	9d554212de0398ac044e6d815da3bfb184831e77 (diff)
download	rust-c3ce245ba68f62edfc5818f003b2b78a02ce5c03.tar.gz rust-c3ce245ba68f62edfc5818f003b2b78a02ce5c03.zip