diff options
| author | bors <bors@rust-lang.org> | 2013-05-20 12:04:47 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2013-05-20 12:04:47 -0700 |
| commit | 26babaafcdbcfdf2e842d84dbeabbed0dae6efef (patch) | |
| tree | 51ba2c1f043882643de9ca2957d9efab24335a91 | |
| parent | f3b458b5c5a555067e9d013066fac9c8de50c3f8 (diff) | |
| parent | b71a1ecea2de87cff3089f9f261be71cd314aac9 (diff) | |
| download | rust-26babaafcdbcfdf2e842d84dbeabbed0dae6efef.tar.gz rust-26babaafcdbcfdf2e842d84dbeabbed0dae6efef.zip | |
auto merge of #6559 : jbclements/rust/hygiene-fns-and-cleanup, r=jbclements
This includes new, tested, hygiene support functions. It also removes the interner_key! macro and replaces it with a function, which should be inline-able. It also contains some parser patch-ups and some docfixes. On my machine, this patch passes all tests.
| -rw-r--r-- | doc/rust.md | 28 | ||||
| -rw-r--r-- | src/librustdoc/extract.rs | 18 | ||||
| -rw-r--r-- | src/libsyntax/ast.rs | 36 | ||||
| -rw-r--r-- | src/libsyntax/ast_util.rs | 134 | ||||
| -rw-r--r-- | src/libsyntax/ext/expand.rs | 93 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer.rs | 2 | ||||
| -rw-r--r-- | src/libsyntax/parse/mod.rs | 139 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 62 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 29 | ||||
| -rw-r--r-- | src/libsyntax/syntax.rc | 5 | ||||
| -rw-r--r-- | src/libsyntax/util/interner.rs | 16 | ||||
| -rw-r--r-- | src/libsyntax/util/parser_testing.rs | 60 | ||||
| -rw-r--r-- | src/test/compile-fail/attr-before-ext.rs | 4 | ||||
| -rw-r--r-- | src/test/compile-fail/attr-before-let.rs | 4 |
14 files changed, 359 insertions, 271 deletions
diff --git a/doc/rust.md b/doc/rust.md index 9839e9e8afd..a115f41ae86 100644 --- a/doc/rust.md +++ b/doc/rust.md @@ -2250,6 +2250,14 @@ do_expr : "do" expr [ '|' ident_list '|' ] ? '{' block '}' ; A _do expression_ provides a more-familiar block-syntax for a [lambda expression](#lambda-expressions), including a special translation of [return expressions](#return-expressions) inside the supplied block. +Any occurrence of a [return expression](#return-expressions) +inside this `block` expression is rewritten +as a reference to an (anonymous) flag set in the caller's environment, +which is checked on return from the `expr` and, if set, +causes a corresponding return from the caller. +In this way, the meaning of `return` statements in language built-in control blocks is preserved, +if they are rewritten using lambda functions and `do` expressions as abstractions. + The optional `ident_list` and `block` provided in a `do` expression are parsed as though they constitute a lambda expression; if the `ident_list` is missing, an empty `ident_list` is implied. @@ -2296,19 +2304,15 @@ A _for expression_ is similar to a [`do` expression](#do-expressions), in that it provides a special block-form of lambda expression, suited to passing the `block` function to a higher-order function implementing a loop. -Like a `do` expression, a `return` expression inside a `for` expresison is rewritten, -to access a local flag that causes an early return in the caller. - -Additionally, any occurrence of a [return expression](#return-expressions) -inside the `block` of a `for` expression is rewritten -as a reference to an (anonymous) flag set in the caller's environment, -which is checked on return from the `expr` and, if set, -causes a corresponding return from the caller. -In this way, the meaning of `return` statements in language built-in control blocks is preserved, -if they are rewritten using lambda functions and `do` expressions as abstractions. +In contrast to a `do` expression, a `for` expression is designed to work +with methods such as `each` and `times`, that require the body block to +return a boolean. The `for` expression accommodates this by implicitly +returning `true` at the end of each block, unless a `break` expression +is evaluated. -Like `return` expressions, any [`break`](#break-expressions) and [`loop`](#loop-expressions) expressions -are rewritten inside `for` expressions, with a combination of local flag variables, +In addition, [`break`](#break-expressions) and [`loop`](#loop-expressions) expressions +are rewritten inside `for` expressions in the same way that `return` expressions are, +with a combination of local flag variables, and early boolean-valued returns from the `block` function, such that the meaning of `break` and `loop` is preserved in a primitive loop when rewritten as a `for` loop controlled by a higher order function. diff --git a/src/librustdoc/extract.rs b/src/librustdoc/extract.rs index 0c49d457ad8..e3da6e6844e 100644 --- a/src/librustdoc/extract.rs +++ b/src/librustdoc/extract.rs @@ -17,24 +17,18 @@ use doc; use core::local_data::local_data_get; use syntax::ast; use syntax; - -/* can't import macros yet, so this is copied from token.rs. See its comment - * there. */ -macro_rules! interner_key ( - () => (cast::transmute::<(uint, uint), - &fn(v: @@syntax::parse::token::ident_interner)>((-3 as uint, 0u))) -) +use syntax::parse::token::{ident_interner}; +use syntax::parse::token; // Hack; rather than thread an interner through everywhere, rely on // thread-local data pub fn to_str(id: ast::ident) -> ~str { - let intr = unsafe{ local_data_get(interner_key!()) }; - - return copy *(*intr.get()).get(id); + let intr = token::get_ident_interner(); + return copy *(*intr).get(id); } -pub fn interner() -> @syntax::parse::token::ident_interner { - return *(unsafe{ local_data_get(interner_key!()) }).get(); +pub fn interner() -> @ident_interner { + return token::get_ident_interner(); } pub fn from_srv( diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 241450e7130..a8ff8dab54c 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -13,12 +13,14 @@ use codemap::{span, spanned}; use abi::AbiSet; use opt_vec::OptVec; +use parse::token::get_ident_interner; use core::cast; -use core::option::{None, Option, Some}; +use core::option::{Option}; use core::to_bytes; use core::to_bytes::IterBytes; use core::to_str::ToStr; +use core::hashmap::HashMap; use std::serialize::{Encodable, Decodable, Encoder, Decoder}; @@ -38,14 +40,20 @@ pub struct ident { repr: Name, ctxt: SyntaxContext } // that's causing unreleased memory to cause core dumps // and also perhaps to save some work in destructor checks. // the special uint '0' will be used to indicate an empty -// syntax context +// syntax context. // this uint is a reference to a table stored in thread-local // storage. pub type SyntaxContext = uint; -pub type SCTable = ~[SyntaxContext_]; +pub struct SCTable { + table : ~[SyntaxContext_], + mark_memo : HashMap<(SyntaxContext,Mrk),SyntaxContext>, + rename_memo : HashMap<(SyntaxContext,ident,Name),SyntaxContext> +} +// NB: these must be placed in any SCTable... pub static empty_ctxt : uint = 0; +pub static illegal_ctxt : uint = 1; #[deriving(Eq, Encodable, Decodable)] pub enum SyntaxContext_ { @@ -59,7 +67,8 @@ pub enum SyntaxContext_ { // "to" slot must have the same name and context // in the "from" slot. In essence, they're all // pointers to a single "rename" event node. - Rename (ident,Name,SyntaxContext) + Rename (ident,Name,SyntaxContext), + IllegalCtxt() } // a name represents an identifier @@ -70,27 +79,14 @@ pub type Mrk = uint; impl<S:Encoder> Encodable<S> for ident { fn encode(&self, s: &mut S) { - unsafe { - let intr = - match local_data::local_data_get(interner_key!()) { - None => fail!("encode: TLS interner not set up"), - Some(intr) => intr - }; - - s.emit_str(*(*intr).get(*self)); - } + let intr = get_ident_interner(); + s.emit_str(*(*intr).get(*self)); } } impl<D:Decoder> Decodable<D> for ident { fn decode(d: &mut D) -> ident { - let intr = match unsafe { - local_data::local_data_get(interner_key!()) - } { - None => fail!("decode: TLS interner not set up"), - Some(intr) => intr - }; - + let intr = get_ident_interner(); (*intr).intern(d.read_str()) } } diff --git a/src/libsyntax/ast_util.rs b/src/libsyntax/ast_util.rs index bb6d13b2831..eaacb054ca1 100644 --- a/src/libsyntax/ast_util.rs +++ b/src/libsyntax/ast_util.rs @@ -15,6 +15,7 @@ use codemap::{span, spanned}; use parse::token; use visit; use opt_vec; +use core::hashmap::HashMap; use core::to_bytes; @@ -577,22 +578,61 @@ pub enum Privacy { // HYGIENE FUNCTIONS /// Construct an identifier with the given repr and an empty context: -pub fn mk_ident(repr: uint) -> ident { ident {repr: repr, ctxt: 0}} +pub fn new_ident(repr: uint) -> ident { ident {repr: repr, ctxt: 0}} /// Extend a syntax context with a given mark -pub fn mk_mark (m:Mrk,ctxt:SyntaxContext,table:&mut SCTable) +pub fn new_mark (m:Mrk, tail:SyntaxContext,table:&mut SCTable) -> SyntaxContext { - idx_push(table,Mark(m,ctxt)) + let key = (tail,m); + // FIXME #5074 : can't use more natural style because we're missing + // flow-sensitivity. Results in two lookups on a hash table hit. + // also applies to new_rename, below. + // let try_lookup = table.mark_memo.find(&key); + match table.mark_memo.contains_key(&key) { + false => { + let new_idx = idx_push(&mut table.table,Mark(m,tail)); + table.mark_memo.insert(key,new_idx); + new_idx + } + true => { + match table.mark_memo.find(&key) { + None => fail!(~"internal error: key disappeared 2013042901"), + Some(idxptr) => {*idxptr} + } + } + } } /// Extend a syntax context with a given rename -pub fn mk_rename (id:ident, to:Name, tail:SyntaxContext, table: &mut SCTable) +pub fn new_rename (id:ident, to:Name, tail:SyntaxContext, table: &mut SCTable) -> SyntaxContext { - idx_push(table,Rename(id,to,tail)) + let key = (tail,id,to); + // FIXME #5074 + //let try_lookup = table.rename_memo.find(&key); + match table.rename_memo.contains_key(&key) { + false => { + let new_idx = idx_push(&mut table.table,Rename(id,to,tail)); + table.rename_memo.insert(key,new_idx); + new_idx + } + true => { + match table.rename_memo.find(&key) { + None => fail!(~"internal error: key disappeared 2013042902"), + Some(idxptr) => {*idxptr} + } + } + } } /// Make a fresh syntax context table with EmptyCtxt in slot zero -pub fn mk_sctable() -> SCTable { ~[EmptyCtxt] } +/// and IllegalCtxt in slot one. +pub fn new_sctable() -> SCTable { + SCTable { + table: ~[EmptyCtxt,IllegalCtxt], + mark_memo: HashMap::new(), + rename_memo: HashMap::new() + } +} /// Add a value to the end of a vec, return its index fn idx_push<T>(vec: &mut ~[T], val: T) -> uint { @@ -601,8 +641,8 @@ fn idx_push<T>(vec: &mut ~[T], val: T) -> uint { } /// Resolve a syntax object to a name, per MTWT. -pub fn resolve (id : ident, table : &SCTable) -> Name { - match table[id.ctxt] { +pub fn resolve (id : ident, table : &mut SCTable) -> Name { + match table.table[id.ctxt] { EmptyCtxt => id.repr, // ignore marks here: Mark(_,subctxt) => resolve (ident{repr:id.repr, ctxt: subctxt},table), @@ -619,6 +659,7 @@ pub fn resolve (id : ident, table : &SCTable) -> Name { resolvedthis } } + IllegalCtxt() => fail!(~"expected resolvable context, got IllegalCtxt") } } @@ -629,7 +670,7 @@ pub fn marksof(ctxt: SyntaxContext, stopname: Name, table: &SCTable) -> ~[Mrk] { let mut result = ~[]; let mut loopvar = ctxt; loop { - match table[loopvar] { + match table.table[loopvar] { EmptyCtxt => {return result;}, Mark(mark,tl) => { xorPush(&mut result,mark); @@ -644,6 +685,7 @@ pub fn marksof(ctxt: SyntaxContext, stopname: Name, table: &SCTable) -> ~[Mrk] { loopvar = tl; } } + IllegalCtxt => fail!(~"expected resolvable context, got IllegalCtxt") } } } @@ -713,15 +755,15 @@ mod test { -> SyntaxContext { tscs.foldr(tail, |tsc : &TestSC,tail : SyntaxContext| {match *tsc { - M(mrk) => mk_mark(mrk,tail,table), - R(ident,name) => mk_rename(ident,name,tail,table)}}) + M(mrk) => new_mark(mrk,tail,table), + R(ident,name) => new_rename(ident,name,tail,table)}}) } // gather a SyntaxContext back into a vector of TestSCs fn refold_test_sc(mut sc: SyntaxContext, table : &SCTable) -> ~[TestSC] { let mut result = ~[]; loop { - match table[sc] { + match table.table[sc] { EmptyCtxt => {return result;}, Mark(mrk,tail) => { result.push(M(mrk)); @@ -733,40 +775,41 @@ mod test { sc = tail; loop; } + IllegalCtxt => fail!("expected resolvable context, got IllegalCtxt") } } } #[test] fn test_unfold_refold(){ - let mut t = mk_sctable(); + let mut t = new_sctable(); let test_sc = ~[M(3),R(id(101,0),14),M(9)]; - assert_eq!(unfold_test_sc(copy test_sc,empty_ctxt,&mut t),3); - assert_eq!(t[1],Mark(9,0)); - assert_eq!(t[2],Rename(id(101,0),14,1)); - assert_eq!(t[3],Mark(3,2)); - assert_eq!(refold_test_sc(3,&t),test_sc); + assert_eq!(unfold_test_sc(copy test_sc,empty_ctxt,&mut t),4); + assert_eq!(t.table[2],Mark(9,0)); + assert_eq!(t.table[3],Rename(id(101,0),14,2)); + assert_eq!(t.table[4],Mark(3,3)); + assert_eq!(refold_test_sc(4,&t),test_sc); } // extend a syntax context with a sequence of marks given // in a vector. v[0] will be the outermost mark. fn unfold_marks(mrks:~[Mrk],tail:SyntaxContext,table: &mut SCTable) -> SyntaxContext { mrks.foldr(tail, |mrk:&Mrk,tail:SyntaxContext| - {mk_mark(*mrk,tail,table)}) + {new_mark(*mrk,tail,table)}) } #[test] fn unfold_marks_test() { - let mut t = ~[EmptyCtxt]; + let mut t = new_sctable(); - assert_eq!(unfold_marks(~[3,7],empty_ctxt,&mut t),2); - assert_eq!(t[1],Mark(7,0)); - assert_eq!(t[2],Mark(3,1)); + assert_eq!(unfold_marks(~[3,7],empty_ctxt,&mut t),3); + assert_eq!(t.table[2],Mark(7,0)); + assert_eq!(t.table[3],Mark(3,2)); } #[test] fn test_marksof () { let stopname = 242; let name1 = 243; - let mut t = mk_sctable(); + let mut t = new_sctable(); assert_eq!(marksof (empty_ctxt,stopname,&t),~[]); // FIXME #5074: ANF'd to dodge nested calls { let ans = unfold_marks(~[4,98],empty_ctxt,&mut t); @@ -780,13 +823,13 @@ mod test { // rename where stop doesn't match: { let chain = ~[M(9), R(id(name1, - mk_mark (4, empty_ctxt,&mut t)), + new_mark (4, empty_ctxt,&mut t)), 100101102), M(14)]; let ans = unfold_test_sc(chain,empty_ctxt,&mut t); assert_eq! (marksof (ans, stopname, &t), ~[9,14]);} // rename where stop does match - { let name1sc = mk_mark(4, empty_ctxt, &mut t); + { let name1sc = new_mark(4, empty_ctxt, &mut t); let chain = ~[M(9), R(id(name1, name1sc), stopname), @@ -798,30 +841,30 @@ mod test { #[test] fn resolve_tests () { let a = 40; - let mut t = mk_sctable(); + let mut t = new_sctable(); // - ctxt is MT - assert_eq!(resolve(id(a,empty_ctxt),&t),a); + assert_eq!(resolve(id(a,empty_ctxt),&mut t),a); // - simple ignored marks { let sc = unfold_marks(~[1,2,3],empty_ctxt,&mut t); - assert_eq!(resolve(id(a,sc),&t),a);} + assert_eq!(resolve(id(a,sc),&mut t),a);} // - orthogonal rename where names don't match { let sc = unfold_test_sc(~[R(id(50,empty_ctxt),51),M(12)],empty_ctxt,&mut t); - assert_eq!(resolve(id(a,sc),&t),a);} + assert_eq!(resolve(id(a,sc),&mut t),a);} // - rename where names do match, but marks don't - { let sc1 = mk_mark(1,empty_ctxt,&mut t); + { let sc1 = new_mark(1,empty_ctxt,&mut t); let sc = unfold_test_sc(~[R(id(a,sc1),50), M(1), M(2)], empty_ctxt,&mut t); - assert_eq!(resolve(id(a,sc),&t), a);} + assert_eq!(resolve(id(a,sc),&mut t), a);} // - rename where names and marks match { let sc1 = unfold_test_sc(~[M(1),M(2)],empty_ctxt,&mut t); let sc = unfold_test_sc(~[R(id(a,sc1),50),M(1),M(2)],empty_ctxt,&mut t); - assert_eq!(resolve(id(a,sc),&t), 50); } + assert_eq!(resolve(id(a,sc),&mut t), 50); } // - rename where names and marks match by literal sharing { let sc1 = unfold_test_sc(~[M(1),M(2)],empty_ctxt,&mut t); let sc = unfold_test_sc(~[R(id(a,sc1),50)],sc1,&mut t); - assert_eq!(resolve(id(a,sc),&t), 50); } + assert_eq!(resolve(id(a,sc),&mut t), 50); } // - two renames of the same var.. can only happen if you use // local-expand to prevent the inner binding from being renamed // during the rename-pass caused by the first: @@ -829,20 +872,29 @@ mod test { { let sc = unfold_test_sc(~[R(id(a,empty_ctxt),50), R(id(a,empty_ctxt),51)], empty_ctxt,&mut t); - assert_eq!(resolve(id(a,sc),&t), 51); } + assert_eq!(resolve(id(a,sc),&mut t), 51); } // the simplest double-rename: - { let a_to_a50 = mk_rename(id(a,empty_ctxt),50,empty_ctxt,&mut t); - let a50_to_a51 = mk_rename(id(a,a_to_a50),51,a_to_a50,&mut t); - assert_eq!(resolve(id(a,a50_to_a51),&t),51); + { let a_to_a50 = new_rename(id(a,empty_ctxt),50,empty_ctxt,&mut t); + let a50_to_a51 = new_rename(id(a,a_to_a50),51,a_to_a50,&mut t); + assert_eq!(resolve(id(a,a50_to_a51),&mut t),51); // mark on the outside doesn't stop rename: - let sc = mk_mark(9,a50_to_a51,&mut t); - assert_eq!(resolve(id(a,sc),&t),51); + let sc = new_mark(9,a50_to_a51,&mut t); + assert_eq!(resolve(id(a,sc),&mut t),51); // but mark on the inside does: let a50_to_a51_b = unfold_test_sc(~[R(id(a,a_to_a50),51), M(9)], a_to_a50, &mut t); - assert_eq!(resolve(id(a,a50_to_a51_b),&t),50);} + assert_eq!(resolve(id(a,a50_to_a51_b),&mut t),50);} + } + + #[test] fn hashing_tests () { + let mut t = new_sctable(); + assert_eq!(new_mark(12,empty_ctxt,&mut t),2); + assert_eq!(new_mark(13,empty_ctxt,&mut t),3); + // using the same one again should result in the same index: + assert_eq!(new_mark(12,empty_ctxt,&mut t),2); + // I'm assuming that the rename table will behave the same.... } } diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index f9ca84473fb..db95cb65b76 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -11,7 +11,9 @@ use ast::{blk_, attribute_, attr_outer, meta_word}; use ast::{crate, expr_, expr_mac, mac_invoc_tt}; use ast::{item_mac, stmt_, stmt_mac, stmt_expr, stmt_semi}; +use ast::{SCTable, illegal_ctxt}; use ast; +use ast_util::{new_rename, new_mark, resolve}; use attr; use codemap; use codemap::{span, CallInfo, ExpandedFrom, NameAndSpan, spanned}; @@ -635,62 +637,65 @@ pub fn expand_crate(parse_sess: @mut parse::ParseSess, @f.fold_crate(&*c) } -// given a function from paths to paths, produce +// given a function from idents to idents, produce // an ast_fold that applies that function: -fn fun_to_path_folder(f: @fn(&ast::Path)->ast::Path) -> @ast_fold{ +pub fn fun_to_ident_folder(f: @fn(ast::ident)->ast::ident) -> @ast_fold{ let afp = default_ast_fold(); let f_pre = @AstFoldFns{ - fold_path : |p, _| f(p), + fold_ident : |id, _| f(id), .. *afp }; make_fold(f_pre) } -/* going to have to figure out whether the table is passed in or -extracted from TLS... + // update the ctxts in a path to get a rename node -fn ctxt_update_rename(from: ast::Name, - fromctx: ast::SyntaxContext, to: ast::Name) -> - @fn(&ast::Path,@ast_fold)->ast::Path { - return |p:&ast::Path,_| - ast::Path {span: p.span, - global: p.global, - idents: p.idents.map(|id| - ast::ident{ - repr: id.repr, - // this needs to be cached.... - ctxt: Some(@ast::Rename(from,fromctx, - to,id.ctxt)) - }), - rp: p.rp, - types: p.types}; +pub fn new_ident_renamer(from: ast::ident, + to: ast::Name, + table: @mut SCTable) -> + @fn(ast::ident)->ast::ident { + |id : ast::ident| + ast::ident{ + repr: id.repr, + ctxt: new_rename(from,to,id.ctxt,table) + } } + // update the ctxts in a path to get a mark node -fn ctxt_update_mark(mark: uint) -> - @fn(&ast::Path,@ast_fold)->ast::Path { - return |p:&ast::Path,_| - ast::Path {span: p.span, - global: p.global, - idents: p.idents.map(|id| - ast::ident{ - repr: id.repr, - // this needs to be cached.... - ctxt: Some(@ast::Mark(mark,id.ctxt)) - }), - rp: p.rp, - types: p.types}; +pub fn new_ident_marker(mark: uint, + table: @mut SCTable) -> + @fn(ast::ident)->ast::ident { + |id : ast::ident| + ast::ident{ + repr: id.repr, + ctxt: new_mark(mark,id.ctxt,table) + } } -*/ + +// perform resolution (in the MTWT sense) on all of the +// idents in the tree. This is the final step in expansion. +pub fn new_ident_resolver(table: @mut SCTable) -> + @fn(ast::ident)->ast::ident { + |id : ast::ident| + ast::ident { + repr : resolve(id,table), + ctxt : illegal_ctxt + } +} + #[cfg(test)] mod test { use super::*; use ast; - use ast::{attribute_, attr_outer, meta_word}; + use ast::{attribute_, attr_outer, meta_word, empty_ctxt}; + use ast_util::{new_sctable}; use codemap; use codemap::spanned; use parse; + use core::io; use core::option::{None, Some}; + use util::parser_testing::{string_to_item_and_sess}; // make sure that fail! is present #[test] fn fail_exists_test () { @@ -792,4 +797,22 @@ mod test { } } + #[test] + fn renaming () { + let (maybe_item_ast,sess) = string_to_item_and_sess(@~"fn a() -> int { let b = 13; b} "); + let item_ast = match maybe_item_ast { + Some(x) => x, + None => fail!("test case fail") + }; + let table = @mut new_sctable(); + let a_name = 100; // enforced by testing_interner + let a2_name = sess.interner.gensym("a2").repr; + let renamer = new_ident_renamer(ast::ident{repr:a_name,ctxt:empty_ctxt}, + a2_name,table); + let renamed_ast = fun_to_ident_folder(renamer).fold_item(item_ast).get(); + let resolver = new_ident_resolver(table); + let resolved_ast = fun_to_ident_folder(resolver).fold_item(renamed_ast).get(); + io::print(fmt!("ast: %?\n",resolved_ast)) + } + } diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 764dec0eeb3..5340293bb02 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -786,7 +786,7 @@ mod test { fn setup(teststr: ~str) -> Env { let cm = CodeMap::new(); let fm = cm.new_filemap(~"zebra.rs", @teststr); - let ident_interner = token::mk_ident_interner(); // interner::mk(); + let ident_interner = token::get_ident_interner(); let span_handler = diagnostic::mk_span_handler(diagnostic::mk_handler(None),@cm); Env { diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index fa52f3dde3d..8eb7ca2923c 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -19,7 +19,7 @@ use diagnostic::{span_handler, mk_span_handler, mk_handler, Emitter}; use parse::attr::parser_attr; use parse::lexer::reader; use parse::parser::Parser; -use parse::token::{ident_interner, mk_ident_interner}; +use parse::token::{ident_interner, get_ident_interner}; use core::io; use core::option::{None, Option, Some}; @@ -59,7 +59,7 @@ pub fn new_parse_sess(demitter: Option<Emitter>) -> @mut ParseSess { cm: cm, next_id: 1, span_diagnostic: mk_span_handler(mk_handler(demitter), cm), - interner: mk_ident_interner(), + interner: get_ident_interner(), } } @@ -70,7 +70,7 @@ pub fn new_parse_sess_special_handler(sh: @span_handler, cm: cm, next_id: 1, span_diagnostic: sh, - interner: mk_ident_interner(), + interner: get_ident_interner(), } } @@ -346,76 +346,24 @@ mod test { use std::serialize::Encodable; use std; use core::io; - use core::option::Option; use core::option::Some; use core::option::None; - use core::int; - use core::num::NumCast; - use codemap::{CodeMap, span, BytePos, spanned}; + use codemap::{span, BytePos, spanned}; use opt_vec; use ast; use abi; - use ast_util::mk_ident; use parse::parser::Parser; - use parse::token::{ident_interner, mk_fresh_ident_interner}; - use diagnostic::{mk_span_handler, mk_handler}; - - // add known names to interner for testing - fn mk_testing_interner() -> @ident_interner { - let i = mk_fresh_ident_interner(); - // baby hack; in order to put the identifiers - // 'a' and 'b' at known locations, we're going - // to fill up the interner to length 100. If - // the # of preloaded items on the interner - // ever gets larger than 100, we'll have to - // adjust this number (say, to 200) and - // change the numbers in the identifier - // test cases below. - - assert!(i.len() < 100); - for int::range(0,100-((i.len()).to_int())) |_dc| { - i.gensym("dontcare"); - } - i.intern("a"); - i.intern("b"); - i.intern("c"); - i.intern("d"); - i.intern("return"); - assert_eq!(i.get(ast::ident{repr:101,ctxt:0}), @~"b"); - i - } - - // make a parse_sess that's closed over a - // testing interner (where a -> 100, b -> 101) - fn mk_testing_parse_sess() -> @mut ParseSess { - let interner = mk_testing_interner(); - let cm = @CodeMap::new(); - @mut ParseSess { - cm: cm, - next_id: 1, - span_diagnostic: mk_span_handler(mk_handler(None), cm), - interner: interner, - } - } - - // map a string to tts, using a made-up filename: return both the token_trees - // and the ParseSess - fn string_to_tts_t (source_str : @~str) -> (~[ast::token_tree],@mut ParseSess) { - let ps = mk_testing_parse_sess(); - (filemap_to_tts(ps,string_to_filemap(ps,source_str,~"bogofile")),ps) - } + use parse::token::intern; + use util::parser_testing::{string_to_tts_and_sess,string_to_parser}; + use util::parser_testing::{string_to_expr, string_to_item}; + use util::parser_testing::{string_to_stmt}; // map a string to tts, return the tt without its parsesess fn string_to_tts_only(source_str : @~str) -> ~[ast::token_tree] { - let (tts,_ps) = string_to_tts_t(source_str); + let (tts,_ps) = string_to_tts_and_sess(source_str); tts } - // map string to parser (via tts) - fn string_to_parser(source_str: @~str) -> Parser { - let ps = mk_testing_parse_sess(); - new_parser_from_source_str(ps,~[],~"bogofile",source_str) - } #[cfg(test)] fn to_json_str<E : Encodable<std::json::Encoder>>(val: @E) -> ~str { do io::with_str_writer |writer| { @@ -424,30 +372,14 @@ mod test { } } - fn string_to_crate (source_str : @~str) -> @ast::crate { - string_to_parser(source_str).parse_crate_mod() - } - - fn string_to_expr (source_str : @~str) -> @ast::expr { - string_to_parser(source_str).parse_expr() - } - - fn string_to_item (source_str : @~str) -> Option<@ast::item> { - string_to_parser(source_str).parse_item(~[]) - } - - fn string_to_stmt (source_str : @~str) -> @ast::stmt { - string_to_parser(source_str).parse_stmt(~[]) - } - // produce a codemap::span fn sp (a: uint, b: uint) -> span { span{lo:BytePos(a),hi:BytePos(b),expn_info:None} } // convert a vector of uints to a vector of ast::idents - fn ints_to_idents(ids: ~[uint]) -> ~[ast::ident] { - ids.map(|u| mk_ident(*u)) + fn ints_to_idents(ids: ~[~str]) -> ~[ast::ident] { + ids.map(|u| intern(*u)) } #[test] fn path_exprs_1 () { @@ -456,7 +388,7 @@ mod test { callee_id:2, node:ast::expr_path(@ast::Path {span:sp(0,1), global:false, - idents:~[mk_ident(100)], + idents:~[intern("a")], rp:None, types:~[]}), span:sp(0,1)}) @@ -466,11 +398,12 @@ mod test { assert_eq!(string_to_expr(@~"::a::b"), @ast::expr{id:1, callee_id:2, - node:ast::expr_path(@ast::Path {span:sp(0,6), - global:true, - idents:ints_to_idents(~[100,101]), - rp:None, - types:~[]}), + node:ast::expr_path( + @ast::Path {span:sp(0,6), + global:true, + idents:ints_to_idents(~[~"a",~"b"]), + rp:None, + types:~[]}), span:sp(0,6)}) } @@ -482,7 +415,7 @@ mod test { }*/ #[test] fn string_to_tts_1 () { - let (tts,_ps) = string_to_tts_t(@~"fn a (b : int) { b; }"); + let (tts,_ps) = string_to_tts_and_sess(@~"fn a (b : int) { b; }"); assert_eq!(to_json_str(@tts), ~"[\ [\"tt_tok\",null,[\"IDENT\",\"fn\",false]],\ @@ -519,7 +452,7 @@ mod test { node:ast::expr_path( @ast::Path{span:sp(7,8), global:false, - idents:~[mk_ident(103)], + idents:~[intern("d")], rp:None, types:~[] }), @@ -537,7 +470,7 @@ mod test { @ast::Path{ span:sp(0,1), global:false, - idents:~[mk_ident(101)], + idents:~[intern("b")], rp:None, types: ~[]}), span: sp(0,1)}, @@ -558,7 +491,7 @@ mod test { @ast::Path{ span:sp(0,1), global:false, - idents:~[mk_ident(101)], + idents:~[intern("b")], rp: None, types: ~[]}, None // no idea @@ -577,7 +510,7 @@ mod test { span:sp(4,4), // this is bizarre... // check this in the original parser? global:false, - idents:~[mk_ident(105)], + idents:~[intern("int")], rp: None, types: ~[]}, 2), @@ -587,7 +520,7 @@ mod test { @ast::Path{ span:sp(0,1), global:false, - idents:~[mk_ident(101)], + idents:~[intern("b")], rp: None, types: ~[]}, None // no idea @@ -603,7 +536,7 @@ mod test { // assignment order of the node_ids. assert_eq!(string_to_item(@~"fn a (b : int) { b; }"), Some( - @ast::item{ident:mk_ident(100), + @ast::item{ident:intern("a"), attrs:~[], id: 10, // fixme node: ast::item_fn(ast::fn_decl{ @@ -613,7 +546,7 @@ mod test { node: ast::ty_path(@ast::Path{ span:sp(10,13), global:false, - idents:~[mk_ident(106)], + idents:~[intern("int")], rp: None, types: ~[]}, 2), @@ -624,7 +557,7 @@ mod test { @ast::Path{ span:sp(6,7), global:false, - idents:~[mk_ident(101)], + idents:~[intern("b")], rp: None, types: ~[]}, None // no idea @@ -655,7 +588,7 @@ mod test { @ast::Path{ span:sp(17,18), global:false, - idents:~[mk_ident(101)], + idents:~[intern("b")], rp:None, types: ~[]}), span: sp(17,18)}, @@ -675,4 +608,20 @@ mod test { string_to_expr(@~"3 + 4"); string_to_expr(@~"a::z.froob(b,@(987+3))"); } + + #[test] fn attrs_fix_bug () { + string_to_item(@~"pub fn mk_file_writer(path: &Path, flags: &[FileFlag]) + -> Result<@Writer, ~str> { + #[cfg(windows)] + fn wb() -> c_int { + (O_WRONLY | libc::consts::os::extra::O_BINARY) as c_int + } + + #[cfg(unix)] + fn wb() -> c_int { O_WRONLY as c_int } + + let mut fflags: c_int = wb(); +}"); + } + } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index cfef9c49879..fddeea93024 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -2496,6 +2496,7 @@ pub impl Parser { @ast::pat { id: self.get_id(), node: pat, span: mk_sp(lo, hi) } } + // parse ident or ident @ pat // used by the copy foo and ref foo patterns to give a good // error message when parsing mistakes like ref foo(a,b) fn parse_pat_ident(&self, @@ -2587,20 +2588,22 @@ pub impl Parser { }) } - // parse a statement. may include decl - fn parse_stmt(&self, first_item_attrs: ~[attribute]) -> @stmt { + // parse a statement. may include decl. + // precondition: any attributes are parsed already + fn parse_stmt(&self, item_attrs: ~[attribute]) -> @stmt { maybe_whole!(self, nt_stmt); fn check_expected_item(p: &Parser, current_attrs: &[attribute]) { // If we have attributes then we should have an item if !current_attrs.is_empty() { - p.fatal(~"expected item after attrs"); + p.span_err(*p.last_span, + ~"expected item after attributes"); } } let lo = self.span.lo; if self.is_keyword("let") { - check_expected_item(self, first_item_attrs); + check_expected_item(self, item_attrs); self.expect_keyword("let"); let decl = self.parse_let(); return @spanned(lo, decl.span.hi, stmt_decl(decl, self.get_id())); @@ -2613,7 +2616,7 @@ pub impl Parser { // to the macro clause of parse_item_or_view_item. This // could use some cleanup, it appears to me. - check_expected_item(self, first_item_attrs); + check_expected_item(self, item_attrs); // Potential trouble: if we allow macros with paths instead of // idents, we'd need to look ahead past the whole path here... @@ -2649,9 +2652,6 @@ pub impl Parser { } } else { - let item_attrs = vec::append(first_item_attrs, - self.parse_outer_attributes()); - match self.parse_item_or_view_item(/*bad*/ copy item_attrs, false) { iovi_item(i) => { @@ -2726,6 +2726,7 @@ pub impl Parser { let mut stmts = ~[]; let mut expr = None; + // wouldn't it be more uniform to parse view items only, here? let ParsedItemsAndViewItems { attrs_remaining: attrs_remaining, view_items: view_items, @@ -2740,23 +2741,29 @@ pub impl Parser { stmt_decl(decl, self.get_id()))); } - let mut initial_attrs = attrs_remaining; - - if *self.token == token::RBRACE && !vec::is_empty(initial_attrs) { - self.fatal(~"expected item"); - } + let mut attributes_box = attrs_remaining; - while *self.token != token::RBRACE { + while (*self.token != token::RBRACE) { + // parsing items even when they're not allowed lets us give + // better error messages and recover more gracefully. + attributes_box.push_all(self.parse_outer_attributes()); match *self.token { token::SEMI => { + if !vec::is_empty(attributes_box) { + self.span_err(*self.last_span,~"expected item after attributes"); + attributes_box = ~[]; + } self.bump(); // empty } + token::RBRACE => { + // fall through and out. + } _ => { - let stmt = self.parse_stmt(initial_attrs); - initial_attrs = ~[]; + let stmt = self.parse_stmt(attributes_box); + attributes_box = ~[]; match stmt.node { stmt_expr(e, stmt_id) => { - // Expression without semicolon + // expression without semicolon match *self.token { token::SEMI => { self.bump(); @@ -2772,7 +2779,7 @@ pub impl Parser { self.fatal( fmt!( "expected `;` or `}` after \ - expression but found `%s`", + expression but found `%s`", self.token_to_str(&t) ) ); @@ -2781,9 +2788,8 @@ pub impl Parser { } } } - stmt_mac(ref m, _) => { - // Statement macro; might be an expr + // statement macro; might be an expr match *self.token { token::SEMI => { self.bump(); @@ -2802,8 +2808,7 @@ pub impl Parser { _ => { stmts.push(stmt); } } } - - _ => { // All other kinds of statements: + _ => { // all other kinds of statements: stmts.push(stmt); if classify::stmt_ends_with_semi(stmt) { @@ -2814,6 +2819,11 @@ pub impl Parser { } } } + + if !vec::is_empty(attributes_box) { + self.span_err(*self.last_span,~"expected item after attributes"); + } + let hi = self.span.hi; self.bump(); let bloc = ast::blk_ { @@ -3518,7 +3528,7 @@ pub impl Parser { if first && attrs_remaining_len > 0u { // We parsed attributes for the first item but didn't find it - self.fatal(~"expected item"); + self.span_err(*self.last_span,~"expected item after attributes"); } ast::_mod { view_items: view_items, items: items } @@ -3723,11 +3733,15 @@ pub impl Parser { first_item_attrs: ~[attribute]) -> foreign_mod { let ParsedItemsAndViewItems { - attrs_remaining: _, + attrs_remaining: attrs_remaining, view_items: view_items, items: _, foreign_items: foreign_items } = self.parse_foreign_items(first_item_attrs, true); + if (! attrs_remaining.is_empty()) { + self.span_err(*self.last_span, + ~"expected item after attributes"); + } assert!(*self.token == token::RBRACE); ast::foreign_mod { sort: sort, diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 88fa5389089..b4bad5abbf9 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -411,8 +411,7 @@ pub impl ident_interner { } // return a fresh interner, preloaded with special identifiers. -// EFFECT: stores this interner in TLS -pub fn mk_fresh_ident_interner() -> @ident_interner { +fn mk_fresh_ident_interner() -> @ident_interner { // the indices here must correspond to the numbers in // special_idents. let init_vec = ~[ @@ -453,23 +452,27 @@ pub fn mk_fresh_ident_interner() -> @ident_interner { "Self", // 34 ]; - let rv = @ident_interner { + @ident_interner { interner: interner::StrInterner::prefill(init_vec) - }; - unsafe { - local_data::local_data_set(interner_key!(), @rv); } - rv } // if an interner exists in TLS, return it. Otherwise, prepare a // fresh one. -pub fn mk_ident_interner() -> @ident_interner { +pub fn get_ident_interner() -> @ident_interner { unsafe { - match local_data::local_data_get(interner_key!()) { + let key = + (cast::transmute::<(uint, uint), + &fn(v: @@::parse::token::ident_interner)>( + (-3 as uint, 0u))); + match local_data::local_data_get(key) { Some(interner) => *interner, None => { - mk_fresh_ident_interner() + let interner = mk_fresh_ident_interner(); + unsafe { + local_data::local_data_set(key, @interner); + } + interner } } } @@ -481,6 +484,12 @@ pub fn mk_fake_ident_interner() -> @ident_interner { @ident_interner { interner: interner::StrInterner::new() } } +// maps a string to its interned representation +pub fn intern(str : &str) -> ast::ident { + let interner = get_ident_interner(); + interner.intern(str) +} + /** * All the valid words that have meaning in the Rust language. * diff --git a/src/libsyntax/syntax.rc b/src/libsyntax/syntax.rc index 0d9b7ca429e..0650df57603 100644 --- a/src/libsyntax/syntax.rc +++ b/src/libsyntax/syntax.rc @@ -25,11 +25,10 @@ extern mod std; -// allow the interner_key macro -// to escape this module: -#[macro_escape] pub mod util { pub mod interner; + #[cfg(test)] + pub mod parser_testing; } pub mod syntax { diff --git a/src/libsyntax/util/interner.rs b/src/libsyntax/util/interner.rs index cca2ec89fd4..5d49c8cd75d 100644 --- a/src/libsyntax/util/interner.rs +++ b/src/libsyntax/util/interner.rs @@ -12,9 +12,6 @@ // allows bidirectional lookup; i.e. given a value, one can easily find the // type, and vice versa. -// allow the interner_key macro to escape this module: -#[macro_escape]; - use core::cmp::Equiv; use core::hashmap::HashMap; use syntax::parse::token::StringRef; @@ -78,6 +75,8 @@ pub impl<T:Eq + IterBytes + Hash + Const + Copy> Interner<T> { } } +// A StrInterner differs from Interner<String> in that it accepts +// borrowed pointers rather than @ ones, resulting in less allocation. pub struct StrInterner { priv map: @mut HashMap<@~str, uint>, priv vect: @mut ~[@~str], @@ -133,17 +132,6 @@ pub impl StrInterner { } } -/* Key for thread-local data for sneaking interner information to the -* encoder/decoder. It sounds like a hack because it is one. -* Bonus ultra-hack: functions as keys don't work across crates, -* so we have to use a unique number. See taskgroup_key! in task.rs -* for another case of this. */ -macro_rules! interner_key ( - () => (cast::transmute::<(uint, uint), - &fn(v: @@::parse::token::ident_interner)>( - (-3 as uint, 0u))) -) - #[cfg(test)] mod tests { use super::*; diff --git a/src/libsyntax/util/parser_testing.rs b/src/libsyntax/util/parser_testing.rs new file mode 100644 index 00000000000..1c2210c96b6 --- /dev/null +++ b/src/libsyntax/util/parser_testing.rs @@ -0,0 +1,60 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use core::option::{Option,None}; +use ast; +use parse::parser::Parser; +use parse::{new_parse_sess}; + +use syntax::parse::{ParseSess,string_to_filemap,filemap_to_tts}; +use syntax::parse::{new_parser_from_source_str}; + +// map a string to tts, using a made-up filename: return both the token_trees +// and the ParseSess +pub fn string_to_tts_and_sess (source_str : @~str) -> (~[ast::token_tree],@mut ParseSess) { + let ps = new_parse_sess(None); + (filemap_to_tts(ps,string_to_filemap(ps,source_str,~"bogofile")),ps) +} + +pub fn string_to_parser_and_sess(source_str: @~str) -> (Parser,@mut ParseSess) { + let ps = new_parse_sess(None); + (new_parser_from_source_str(ps,~[],~"bogofile",source_str),ps) +} + +// map string to parser (via tts) +pub fn string_to_parser(source_str: @~str) -> Parser { + let (p,_) = string_to_parser_and_sess(source_str); + p +} + +pub fn string_to_crate (source_str : @~str) -> @ast::crate { + string_to_parser(source_str).parse_crate_mod() +} + +// parse a string, return an expr +pub fn string_to_expr (source_str : @~str) -> @ast::expr { + string_to_parser(source_str).parse_expr() +} + +// parse a string, return an item +pub fn string_to_item (source_str : @~str) -> Option<@ast::item> { + string_to_parser(source_str).parse_item(~[]) +} + +// parse a string, return an item and the ParseSess +pub fn string_to_item_and_sess (source_str : @~str) -> (Option<@ast::item>,@mut ParseSess) { + let (p,ps) = string_to_parser_and_sess(source_str); + (p.parse_item(~[]),ps) +} + +pub fn string_to_stmt (source_str : @~str) -> @ast::stmt { + string_to_parser(source_str).parse_stmt(~[]) +} + diff --git a/src/test/compile-fail/attr-before-ext.rs b/src/test/compile-fail/attr-before-ext.rs index 2675b865e90..cf0f4a6240e 100644 --- a/src/test/compile-fail/attr-before-ext.rs +++ b/src/test/compile-fail/attr-before-ext.rs @@ -9,6 +9,6 @@ // except according to those terms. fn main() { - #[attr] - debug!("hi"); //~ ERROR expected item after attrs + #[attr] //~ ERROR expected item after attributes + debug!("hi"); } diff --git a/src/test/compile-fail/attr-before-let.rs b/src/test/compile-fail/attr-before-let.rs index 51ee903b1b1..acc9aa8a9a1 100644 --- a/src/test/compile-fail/attr-before-let.rs +++ b/src/test/compile-fail/attr-before-let.rs @@ -9,6 +9,6 @@ // except according to those terms. fn main() { - #[attr] - let _i = 0; //~ ERROR expected item + #[attr] //~ ERROR expected item + let _i = 0; } |
