diff options
| author | Felix S. Klock II <pnkfelix@pnkfx.org> | 2014-08-01 17:11:53 +0200 |
|---|---|---|
| committer | Felix S. Klock II <pnkfelix@pnkfx.org> | 2014-08-13 17:40:15 +0200 |
| commit | c3ce245ba68f62edfc5818f003b2b78a02ce5c03 (patch) | |
| tree | 0be1530cc0bd24d38a1f8f9d80d196e75be26a60 /src/libsyntax/parse | |
| parent | 9d554212de0398ac044e6d815da3bfb184831e77 (diff) | |
| download | rust-c3ce245ba68f62edfc5818f003b2b78a02ce5c03.tar.gz rust-c3ce245ba68f62edfc5818f003b2b78a02ce5c03.zip | |
quote_expr macro: embed Ident using special encoding that preserves hygiene.
This adds support to `quote_expr!` and friends for round-trip hygienic preservation of Ident. Here are the pieces of the puzzle: * adding a method for encoding Ident for re-reading into token tree. * Support for reading such encoded Idents in the lexer. Note that one must peek ahead for MOD_SEP after scan_embedded_hygienic_ident. * To ensure that encoded Idents are only read when we are in the midst of expanding a `quote_expr` or similar, added a `read_embedded_ident` flag on `StringReader`. * pprust support for encoding Ident's as (uint,uint) pairs (for hygiene).
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 105 | ||||
| -rw-r--r-- | src/libsyntax/parse/mod.rs | 69 |
2 files changed, 174 insertions, 0 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 625c03ec13d..e5275af5cca 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -17,7 +17,9 @@ use parse::token; use parse::token::{str_to_ident}; use std::char; +use std::fmt; use std::mem::replace; +use std::num; use std::rc::Rc; use std::str; @@ -55,6 +57,11 @@ pub struct StringReader<'a> { /* cached: */ pub peek_tok: token::Token, pub peek_span: Span, + + // FIXME (Issue #16472): This field should go away after ToToken impls + // are revised to go directly to token-trees. + /// Is \x00<name>,<ctxt>\x00 is interpreted as encoded ast::Ident? + read_embedded_ident: bool, } impl<'a> Reader for StringReader<'a> { @@ -106,6 +113,17 @@ impl<'a> Reader for TtReader<'a> { } } +// FIXME (Issue #16472): This function should go away after +// ToToken impls are revised to go directly to token-trees. +pub fn make_reader_with_embedded_idents<'b>(span_diagnostic: &'b SpanHandler, + filemap: Rc<codemap::FileMap>) + -> StringReader<'b> { + let mut sr = StringReader::new_raw(span_diagnostic, filemap); + sr.read_embedded_ident = true; + sr.advance_token(); + sr +} + impl<'a> StringReader<'a> { /// For comments.rs, which hackily pokes into pos and curr pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler, @@ -120,6 +138,7 @@ impl<'a> StringReader<'a> { /* dummy values; not read */ peek_tok: token::EOF, peek_span: codemap::DUMMY_SP, + read_embedded_ident: false, }; sr.bump(); sr @@ -512,6 +531,81 @@ impl<'a> StringReader<'a> { }) } + // FIXME (Issue #16472): The scan_embedded_hygienic_ident function + // should go away after we revise the syntax::ext::quote::ToToken + // impls to go directly to token-trees instead of thing -> string + // -> token-trees. (The function is currently used to resolve + // Issues #15750 and #15962.) + // + // Since this function is only used for certain internal macros, + // and the functionality it provides is not exposed to end user + // programs, pnkfelix deliberately chose to write it in a way that + // favors rustc debugging effectiveness over runtime efficiency. + + /// Scan through input of form \x00name_NNNNNN,ctxt_CCCCCCC\x00 + /// where: `NNNNNN` is a string of characters forming an integer + /// (the name) and `CCCCCCC` is a string of characters forming an + /// integer (the ctxt), separate by a comma and delimited by a + /// `\x00` marker. + #[inline(never)] + fn scan_embedded_hygienic_ident(&mut self) -> ast::Ident { + fn bump_expecting_char<'a,D:fmt::Show>(r: &mut StringReader<'a>, + c: char, + described_c: D, + where: &str) { + match r.curr { + Some(r_c) if r_c == c => r.bump(), + Some(r_c) => fail!("expected {}, hit {}, {}", described_c, r_c, where), + None => fail!("expected {}, hit EOF, {}", described_c, where), + } + } + + let where = "while scanning embedded hygienic ident"; + + // skip over the leading `\x00` + bump_expecting_char(self, '\x00', "nul-byte", where); + + // skip over the "name_" + for c in "name_".chars() { + bump_expecting_char(self, c, c, where); + } + + let start_bpos = self.last_pos; + let base = 10; + + // find the integer representing the name + self.scan_digits(base); + let encoded_name : u32 = self.with_str_from(start_bpos, |s| { + num::from_str_radix(s, 10).unwrap_or_else(|| { + fail!("expected digits representing a name, got `{}`, {}, range [{},{}]", + s, where, start_bpos, self.last_pos); + }) + }); + + // skip over the `,` + bump_expecting_char(self, ',', "comma", where); + + // skip over the "ctxt_" + for c in "ctxt_".chars() { + bump_expecting_char(self, c, c, where); + } + + // find the integer representing the ctxt + let start_bpos = self.last_pos; + self.scan_digits(base); + let encoded_ctxt : ast::SyntaxContext = self.with_str_from(start_bpos, |s| { + num::from_str_radix(s, 10).unwrap_or_else(|| { + fail!("expected digits representing a ctxt, got `{}`, {}", s, where); + }) + }); + + // skip over the `\x00` + bump_expecting_char(self, '\x00', "nul-byte", where); + + ast::Ident { name: ast::Name(encoded_name), + ctxt: encoded_ctxt, } + } + /// Scan through any digits (base `radix`) or underscores, and return how /// many digits there were. fn scan_digits(&mut self, radix: uint) -> uint { @@ -839,6 +933,17 @@ impl<'a> StringReader<'a> { return self.scan_number(c.unwrap()); } + if self.read_embedded_ident { + match (c.unwrap(), self.nextch(), self.nextnextch()) { + ('\x00', Some('n'), Some('a')) => { + let ast_ident = self.scan_embedded_hygienic_ident(); + let is_mod_name = self.curr_is(':') && self.nextch_is(':'); + return token::IDENT(ast_ident, is_mod_name); + } + _ => {} + } + } + match c.expect("next_token_inner called at EOF") { // One-byte tokens. ';' => { self.bump(); return token::SEMI; } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 5b70ed609d9..af1f296a6ca 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -144,6 +144,8 @@ pub fn parse_stmt_from_source_str(name: String, maybe_aborted(p.parse_stmt(attrs),p) } +// Note: keep in sync with `with_hygiene::parse_tts_from_source_str` +// until #16472 is resolved. pub fn parse_tts_from_source_str(name: String, source: String, cfg: ast::CrateConfig, @@ -160,6 +162,8 @@ pub fn parse_tts_from_source_str(name: String, maybe_aborted(p.parse_all_token_trees(),p) } +// Note: keep in sync with `with_hygiene::new_parser_from_source_str` +// until #16472 is resolved. // Create a new parser from a source string pub fn new_parser_from_source_str<'a>(sess: &'a ParseSess, cfg: ast::CrateConfig, @@ -192,6 +196,8 @@ pub fn new_sub_parser_from_file<'a>(sess: &'a ParseSess, p } +// Note: keep this in sync with `with_hygiene::filemap_to_parser` until +// #16472 is resolved. /// Given a filemap and config, return a parser pub fn filemap_to_parser<'a>(sess: &'a ParseSess, filemap: Rc<FileMap>, @@ -248,6 +254,8 @@ pub fn string_to_filemap(sess: &ParseSess, source: String, path: String) sess.span_diagnostic.cm.new_filemap(path, source) } +// Note: keep this in sync with `with_hygiene::filemap_to_tts` (apart +// from the StringReader constructor), until #16472 is resolved. /// Given a filemap, produce a sequence of token-trees pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>) -> Vec<ast::TokenTree> { @@ -267,6 +275,67 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess, Parser::new(sess, cfg, box trdr) } +// FIXME (Issue #16472): The `with_hygiene` mod should go away after +// ToToken impls are revised to go directly to token-trees. +pub mod with_hygiene { + use ast; + use codemap::FileMap; + use parse::parser::Parser; + use std::rc::Rc; + use super::ParseSess; + use super::{maybe_aborted, string_to_filemap, tts_to_parser}; + + // Note: keep this in sync with `super::parse_tts_from_source_str` until + // #16472 is resolved. + pub fn parse_tts_from_source_str(name: String, + source: String, + cfg: ast::CrateConfig, + sess: &ParseSess) -> Vec<ast::TokenTree> { + let mut p = new_parser_from_source_str( + sess, + cfg, + name, + source + ); + p.quote_depth += 1u; + // right now this is re-creating the token trees from ... token trees. + maybe_aborted(p.parse_all_token_trees(),p) + } + + // Note: keep this in sync with `super::new_parser_from_source_str` until + // #16472 is resolved. + // Create a new parser from a source string + fn new_parser_from_source_str<'a>(sess: &'a ParseSess, + cfg: ast::CrateConfig, + name: String, + source: String) -> Parser<'a> { + filemap_to_parser(sess, string_to_filemap(sess, source, name), cfg) + } + + // Note: keep this in sync with `super::filemap_to_parserr` until + // #16472 is resolved. + /// Given a filemap and config, return a parser + fn filemap_to_parser<'a>(sess: &'a ParseSess, + filemap: Rc<FileMap>, + cfg: ast::CrateConfig) -> Parser<'a> { + tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg) + } + + // Note: keep this in sync with `super::filemap_to_tts` until + // #16472 is resolved. + /// Given a filemap, produce a sequence of token-trees + fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>) + -> Vec<ast::TokenTree> { + // it appears to me that the cfg doesn't matter here... indeed, + // parsing tt's probably shouldn't require a parser at all. + use make_reader = super::lexer::make_reader_with_embedded_idents; + let cfg = Vec::new(); + let srdr = make_reader(&sess.span_diagnostic, filemap); + let mut p1 = Parser::new(sess, cfg, box srdr); + p1.parse_all_token_trees() + } +} + /// Abort if necessary pub fn maybe_aborted<T>(result: T, mut p: Parser) -> T { p.abort_if_errors(); |
