about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFelix S. Klock II <pnkfelix@pnkfx.org>2014-05-12 19:11:46 +0200
committerFelix S. Klock II <pnkfelix@pnkfx.org>2014-05-15 11:09:18 +0200
commitb9ed9ed5ad460ccb1fd1a64af38bfcb72e2eada7 (patch)
treead89d01a3a58fc9cb093eaf492ee8a77da97d121
parentd6cf0dfbab35f40abaef345024701ff8112ed2d8 (diff)
downloadrust-b9ed9ed5ad460ccb1fd1a64af38bfcb72e2eada7.tar.gz
rust-b9ed9ed5ad460ccb1fd1a64af38bfcb72e2eada7.zip
Teach SVH computation to ignore more implementation artifacts.
In particular, this version of strict version hash (SVH) works much
like the deriving(Hash)-based implementation did, except that uses a
content-based hash that filters rustc implementation artifacts and
surface syntax artifacts.

Fix #14132.
-rw-r--r--src/librustc/back/svh.rs423
1 files changed, 407 insertions, 16 deletions
diff --git a/src/librustc/back/svh.rs b/src/librustc/back/svh.rs
index 8a2f7ff1976..489722aa13f 100644
--- a/src/librustc/back/svh.rs
+++ b/src/librustc/back/svh.rs
@@ -51,6 +51,7 @@ use std::hash::Hash;
 use std::hash::sip::SipState;
 use std::iter::range_step;
 use syntax::ast;
+use syntax::visit;
 
 #[deriving(Clone, Eq)]
 pub struct Svh {
@@ -68,25 +69,28 @@ impl Svh {
     }
 
     pub fn calculate(krate: &ast::Crate) -> Svh {
-        // FIXME: see above for why this is wrong, it shouldn't just hash the
-        //        crate.  Fixing this would require more in-depth analysis in
-        //        this function about what portions of the crate are reachable
-        //        in tandem with bug fixes throughout the rest of the compiler.
-        //
-        //        Note that for now we actually exclude some top-level things
-        //        from the crate like the CrateConfig/span. The CrateConfig
-        //        contains command-line `--cfg` flags, so this means that the
-        //        stage1/stage2 AST for libstd and such is different hash-wise
-        //        when it's actually the exact same representation-wise.
-        //
-        //        As a first stab at only hashing the relevant parts of the
-        //        AST, this only hashes the module/attrs, not the CrateConfig
-        //        field.
-        //
+        // FIXME (#14132): This is better than it used to be, but it still not
+        // ideal. We now attempt to hash only the relevant portions of the
+        // Crate AST as well as the top-level crate attributes. (However,
+        // the hashing of the crate attributes should be double-checked
+        // to ensure it is not incorporating implementation artifacts into
+        // the hash that are not otherwise visible.)
+
         // FIXME: this should use SHA1, not SipHash. SipHash is not built to
         //        avoid collisions.
         let mut state = SipState::new();
-        krate.module.hash(&mut state);
+
+        {
+            let mut visit = svh_visitor::make(&mut state);
+            visit::walk_crate(&mut visit, krate, ());
+        }
+
+        // FIXME (#14132): This hash is still sensitive to e.g. the
+        // spans of the crate Attributes and their underlying
+        // MetaItems; we should make ContentHashable impl for those
+        // types and then use hash_content.  But, since all crate
+        // attributes should appear near beginning of the file, it is
+        // not such a big deal to be sensitive to their spans for now.
         krate.attrs.hash(&mut state);
 
         let hash = state.result();
@@ -110,3 +114,390 @@ impl fmt::Show for Svh {
         f.pad(self.as_str())
     }
 }
+
+// FIXME (#14132): Even this SVH computation still has implementation
+// artifacts: namely, the order of item declaration will affect the
+// hash computation, but for many kinds of items the order of
+// declaration should be irrelevant to the ABI.
+
+mod svh_visitor {
+    use syntax::ast;
+    use syntax::ast::*;
+    use syntax::codemap::Span;
+    use syntax::parse::token;
+    use syntax::print::pprust;
+    use syntax::visit;
+    use syntax::visit::{Visitor, FnKind};
+
+    use std::hash::Hash;
+    use std::hash::sip::SipState;
+
+    pub struct StrictVersionHashVisitor<'a> {
+        pub st: &'a mut SipState,
+    }
+
+    pub fn make<'a>(st: &'a mut SipState) -> StrictVersionHashVisitor<'a> {
+        StrictVersionHashVisitor { st: st }
+    }
+
+    // To off-load the bulk of the hash-computation on deriving(Hash),
+    // we define a set of enums corresponding to the content that our
+    // crate visitor will encounter as it traverses the ast.
+    //
+    // The important invariant is that all of the Saw*Component enums
+    // do not carry any Spans, Names, or Idents.
+    //
+    // Not carrying any Names/Idents is the important fix for problem
+    // noted on PR #13948: using the ident.name as the basis for a
+    // hash leads to unstable SVH, because ident.name is just an index
+    // into intern table (i.e. essentially a random address), not
+    // computed from the name content.
+    //
+    // With the below enums, the SVH computation is not sensitive to
+    // artifacts of how rustc was invoked nor of how the source code
+    // was laid out.  (Or at least it is *less* sensitive.)
+
+    // This enum represents the different potential bits of code the
+    // visitor could encounter that could affect the ABI for the crate,
+    // and assigns each a distinct tag to feed into the hash computation.
+    #[deriving(Hash)]
+    enum SawAbiComponent<'a> {
+
+        // FIXME (#14132): should we include (some function of)
+        // ident.ctxt as well?
+        SawIdent(token::InternedString),
+        SawStructDef(token::InternedString),
+
+        SawLifetimeRef(token::InternedString),
+        SawLifetimeDecl(token::InternedString),
+
+        SawMod,
+        SawViewItem,
+        SawForeignItem,
+        SawItem,
+        SawDecl,
+        SawTy,
+        SawGenerics,
+        SawFn,
+        SawTyMethod,
+        SawTraitMethod,
+        SawStructField,
+        SawVariant,
+        SawExplicitSelf,
+        SawPath,
+        SawOptLifetimeRef,
+        SawBlock,
+        SawPat,
+        SawLocal,
+        SawArm,
+        SawExpr(SawExprComponent<'a>),
+        SawStmt(SawStmtComponent),
+    }
+
+    /// SawExprComponent carries all of the information that we want
+    /// to include in the hash that *won't* be covered by the
+    /// subsequent recursive traversal of the expression's
+    /// substructure by the visitor.
+    ///
+    /// We know every Expr_ variant is covered by a variant because
+    /// `fn saw_expr` maps each to some case below.  Ensuring that
+    /// each variant carries an appropriate payload has to be verified
+    /// by hand.
+    ///
+    /// (However, getting that *exactly* right is not so important
+    /// because the SVH is just a developer convenience; there is no
+    /// guarantee of collision-freedom, hash collisions are just
+    /// (hopefully) unlikely.)
+    #[deriving(Hash)]
+    pub enum SawExprComponent<'a> {
+
+        SawExprLoop(Option<token::InternedString>),
+        SawExprField(token::InternedString),
+        SawExprBreak(Option<token::InternedString>),
+        SawExprAgain(Option<token::InternedString>),
+
+        SawExprVstore,
+        SawExprBox,
+        SawExprVec,
+        SawExprCall,
+        SawExprMethodCall,
+        SawExprTup,
+        SawExprBinary(ast::BinOp),
+        SawExprUnary(ast::UnOp),
+        SawExprLit(ast::Lit_),
+        SawExprCast,
+        SawExprIf,
+        SawExprWhile,
+        SawExprMatch,
+        SawExprFnBlock,
+        SawExprProc,
+        SawExprBlock,
+        SawExprAssign,
+        SawExprAssignOp(ast::BinOp),
+        SawExprIndex,
+        SawExprPath,
+        SawExprAddrOf(ast::Mutability),
+        SawExprRet,
+        SawExprInlineAsm(&'a ast::InlineAsm),
+        SawExprStruct,
+        SawExprRepeat,
+        SawExprParen,
+    }
+
+    fn saw_expr<'a>(node: &'a Expr_) -> SawExprComponent<'a> {
+        match *node {
+            ExprVstore(..)           => SawExprVstore,
+            ExprBox(..)              => SawExprBox,
+            ExprVec(..)              => SawExprVec,
+            ExprCall(..)             => SawExprCall,
+            ExprMethodCall(..)       => SawExprMethodCall,
+            ExprTup(..)              => SawExprTup,
+            ExprBinary(op, _, _)     => SawExprBinary(op),
+            ExprUnary(op, _)         => SawExprUnary(op),
+            ExprLit(lit)             => SawExprLit(lit.node.clone()),
+            ExprCast(..)             => SawExprCast,
+            ExprIf(..)               => SawExprIf,
+            ExprWhile(..)            => SawExprWhile,
+            ExprLoop(_, id)          => SawExprLoop(id.map(content)),
+            ExprMatch(..)            => SawExprMatch,
+            ExprFnBlock(..)          => SawExprFnBlock,
+            ExprProc(..)             => SawExprProc,
+            ExprBlock(..)            => SawExprBlock,
+            ExprAssign(..)           => SawExprAssign,
+            ExprAssignOp(op, _, _)   => SawExprAssignOp(op),
+            ExprField(_, id, _)      => SawExprField(content(id)),
+            ExprIndex(..)            => SawExprIndex,
+            ExprPath(..)             => SawExprPath,
+            ExprAddrOf(m, _)         => SawExprAddrOf(m),
+            ExprBreak(id)            => SawExprBreak(id.map(content)),
+            ExprAgain(id)            => SawExprAgain(id.map(content)),
+            ExprRet(..)              => SawExprRet,
+            ExprInlineAsm(ref asm)   => SawExprInlineAsm(asm),
+            ExprStruct(..)           => SawExprStruct,
+            ExprRepeat(..)           => SawExprRepeat,
+            ExprParen(..)            => SawExprParen,
+
+            // just syntactic artifacts, expanded away by time of SVH.
+            ExprForLoop(..)          => unreachable!(),
+            ExprMac(..)              => unreachable!(),
+        }
+    }
+
+    /// SawStmtComponent is analogous to SawExprComponent, but for statements.
+    #[deriving(Hash)]
+    pub enum SawStmtComponent {
+        SawStmtDecl,
+        SawStmtExpr,
+        SawStmtSemi,
+    }
+
+    fn saw_stmt(node: &Stmt_) -> SawStmtComponent {
+        match *node {
+            StmtDecl(..) => SawStmtDecl,
+            StmtExpr(..) => SawStmtExpr,
+            StmtSemi(..) => SawStmtSemi,
+            StmtMac(..)  => unreachable!(),
+        }
+    }
+
+    // Ad-hoc overloading between Ident and Name to their intern table lookups.
+    trait InternKey { fn get_content(self) -> token::InternedString; }
+    impl InternKey for Ident {
+        fn get_content(self) -> token::InternedString { token::get_ident(self) }
+    }
+    impl InternKey for Name {
+        fn get_content(self) -> token::InternedString { token::get_name(self) }
+    }
+    fn content<K:InternKey>(k: K) -> token::InternedString { k.get_content() }
+
+    // local short-hand eases writing signatures of syntax::visit mod.
+    type E = ();
+
+    impl<'a> Visitor<E> for StrictVersionHashVisitor<'a> {
+
+        fn visit_mac(&mut self, macro: &Mac, e: E) {
+            // macro invocations, namely macro_rules definitions,
+            // *can* appear as items, even in the expanded crate AST.
+
+            if macro_name(macro).get() == "macro_rules" {
+                // Pretty-printing definition to a string strips out
+                // surface artifacts (currently), such as the span
+                // information, yielding a content-based hash.
+
+                // FIXME (#14132): building temporary string is
+                // expensive; a direct content-based hash on token
+                // trees might be faster. Implementing this is far
+                // easier in short term.
+                let macro_defn_as_string =
+                    pprust::to_str(|pp_state| pp_state.print_mac(macro));
+                macro_defn_as_string.hash(self.st);
+            } else {
+                // It is not possible to observe any kind of macro
+                // invocation at this stage except `macro_rules!`.
+                fail!("reached macro somehow: {}",
+                      pprust::to_str(|pp_state| pp_state.print_mac(macro)));
+            }
+
+            visit::walk_mac(self, macro, e);
+
+            fn macro_name(macro: &Mac) -> token::InternedString {
+                match &macro.node {
+                    &MacInvocTT(ref path, ref _tts, ref _stx_ctxt) => {
+                        let s = path.segments.as_slice();
+                        assert_eq!(s.len(), 1);
+                        content(s[0].identifier)
+                    }
+                }
+            }
+        }
+
+        fn visit_struct_def(&mut self, s: &StructDef, ident: Ident,
+                            g: &Generics, _: NodeId, e: E) {
+            SawStructDef(content(ident)).hash(self.st);
+            visit::walk_generics(self, g, e.clone());
+            visit::walk_struct_def(self, s, e)
+        }
+
+        fn visit_variant(&mut self, v: &Variant, g: &Generics, e: E) {
+            SawVariant.hash(self.st);
+            // walk_variant does not call walk_generics, so do it here.
+            visit::walk_generics(self, g, e.clone());
+            visit::walk_variant(self, v, g, e)
+        }
+
+        fn visit_opt_lifetime_ref(&mut self, _: Span, l: &Option<Lifetime>, env: E) {
+            SawOptLifetimeRef.hash(self.st);
+            // (This is a strange method in the visitor trait, in that
+            // it does not expose a walk function to do the subroutine
+            // calls.)
+            match *l {
+                Some(ref l) => self.visit_lifetime_ref(l, env),
+                None => ()
+            }
+        }
+
+        // All of the remaining methods just record (in the hash
+        // SipState) that the visitor saw that particular variant
+        // (with its payload), and continue walking as the default
+        // visitor would.
+        //
+        // Some of the implementations have some notes as to how one
+        // might try to make their SVH computation less discerning
+        // (e.g. by incorporating reachability analysis).  But
+        // currently all of their implementations are uniform and
+        // uninteresting.
+        //
+        // (If you edit a method such that it deviates from the
+        // pattern, please move that method up above this comment.)
+
+        fn visit_ident(&mut self, _: Span, ident: Ident, _: E) {
+            SawIdent(content(ident)).hash(self.st);
+        }
+
+        fn visit_lifetime_ref(&mut self, l: &Lifetime, _: E) {
+            SawLifetimeRef(content(l.name)).hash(self.st);
+        }
+
+        fn visit_lifetime_decl(&mut self, l: &Lifetime, _: E) {
+            SawLifetimeDecl(content(l.name)).hash(self.st);
+        }
+
+        // We do recursively walk the bodies of functions/methods
+        // (rather than omitting their bodies from the hash) since
+        // monomorphization and cross-crate inlining generally implies
+        // that a change to a crate body will require downstream
+        // crates to be recompiled.
+        fn visit_expr(&mut self, ex: &Expr, e: E) {
+            SawExpr(saw_expr(&ex.node)).hash(self.st); visit::walk_expr(self, ex, e)
+        }
+
+        fn visit_stmt(&mut self, s: &Stmt, e: E) {
+            SawStmt(saw_stmt(&s.node)).hash(self.st); visit::walk_stmt(self, s, e)
+        }
+
+        fn visit_view_item(&mut self, i: &ViewItem, e: E) {
+            // Two kinds of view items can affect the ABI for a crate:
+            // exported `pub use` view items (since that may expose
+            // items that downstream crates can call), and `use
+            // foo::Trait`, since changing that may affect method
+            // resolution.
+            //
+            // The simplest approach to handling both of the above is
+            // just to adopt the same simple-minded (fine-grained)
+            // hash that I am deploying elsewhere here.
+            SawViewItem.hash(self.st); visit::walk_view_item(self, i, e)
+        }
+
+        fn visit_foreign_item(&mut self, i: &ForeignItem, e: E) {
+            // FIXME (#14132) ideally we would incorporate privacy (or
+            // perhaps reachability) somewhere here, so foreign items
+            // that do not leak into downstream crates would not be
+            // part of the ABI.
+            SawForeignItem.hash(self.st); visit::walk_foreign_item(self, i, e)
+        }
+
+        fn visit_item(&mut self, i: &Item, e: E) {
+            // FIXME (#14132) ideally would incorporate reachability
+            // analysis somewhere here, so items that never leak into
+            // downstream crates (e.g. via monomorphisation or
+            // inlining) would not be part of the ABI.
+            SawItem.hash(self.st); visit::walk_item(self, i, e)
+        }
+
+        fn visit_mod(&mut self, m: &Mod, _s: Span, _n: NodeId, e: E) {
+            SawMod.hash(self.st); visit::walk_mod(self, m, e)
+        }
+
+        fn visit_decl(&mut self, d: &Decl, e: E) {
+            SawDecl.hash(self.st); visit::walk_decl(self, d, e)
+        }
+
+        fn visit_ty(&mut self, t: &Ty, e: E) {
+            SawTy.hash(self.st); visit::walk_ty(self, t, e)
+        }
+
+        fn visit_generics(&mut self, g: &Generics, e: E) {
+            SawGenerics.hash(self.st); visit::walk_generics(self, g, e)
+        }
+
+        fn visit_fn(&mut self, fk: &FnKind, fd: &FnDecl, b: &Block, s: Span, _: NodeId, e: E) {
+            SawFn.hash(self.st); visit::walk_fn(self, fk, fd, b, s, e)
+        }
+
+        fn visit_ty_method(&mut self, t: &TypeMethod, e: E) {
+            SawTyMethod.hash(self.st); visit::walk_ty_method(self, t, e)
+        }
+
+        fn visit_trait_method(&mut self, t: &TraitMethod, e: E) {
+            SawTraitMethod.hash(self.st); visit::walk_trait_method(self, t, e)
+        }
+
+        fn visit_struct_field(&mut self, s: &StructField, e: E) {
+            SawStructField.hash(self.st); visit::walk_struct_field(self, s, e)
+        }
+
+        fn visit_explicit_self(&mut self, es: &ExplicitSelf, e: E) {
+            SawExplicitSelf.hash(self.st); visit::walk_explicit_self(self, es, e)
+        }
+
+        fn visit_path(&mut self, path: &Path, _: ast::NodeId, e: E) {
+            SawPath.hash(self.st); visit::walk_path(self, path, e)
+        }
+
+        fn visit_block(&mut self, b: &Block, e: E) {
+            SawBlock.hash(self.st); visit::walk_block(self, b, e)
+        }
+
+        fn visit_pat(&mut self, p: &Pat, e: E) {
+            SawPat.hash(self.st); visit::walk_pat(self, p, e)
+        }
+
+        fn visit_local(&mut self, l: &Local, e: E) {
+            SawLocal.hash(self.st); visit::walk_local(self, l, e)
+        }
+
+        fn visit_arm(&mut self, a: &Arm, e: E) {
+            SawArm.hash(self.st); visit::walk_arm(self, a, e)
+        }
+    }
+}