proc_macro: Stay on the "use the cache" path more

Discovered in #50061 we're falling off the "happy path" of using a stringified token stream more often than we should. This was due to the fact that a user-written token like `0xf` is equality-different from the stringified token of `15` (despite being semantically equivalent). This patch updates the call to `eq_unspanned` with an even more awful solution, `probably_equal_for_proc_macro`, which ignores the value of each token and basically only compares the structure of the token stream, assuming that the AST doesn't change just one token at a time. While this is a step towards fixing #50061 there is still one regression from #49154 which needs to be fixed.
author: Alex Crichton <alex@alexcrichton.com> 2018-04-18 19:36:48 -0700
committer: Alex Crichton <alex@alexcrichton.com> 2018-04-18 19:36:48 -0700
commit: e9348738fca8c88dca6e343702eaa12ffc8df34c (patch)
tree: e778a2aabb658e2ef5754798bf0f4d4f63560422 /src/libsyntax
parent: ac3c2288f9f9d977acb46406ba60033d65165a7b (diff)
download: rust-e9348738fca8c88dca6e343702eaa12ffc8df34c.tar.gz
rust-e9348738fca8c88dca6e343702eaa12ffc8df34c.zip
2 files changed, 125 insertions, 9 deletions
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index 2688a1adb56..44394384c7a 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -26,6 +26,7 @@ use tokenstream::{TokenStream, TokenTree};
 use tokenstream;
 
 use std::{cmp, fmt};
+use std::mem;
 use rustc_data_structures::sync::{Lrc, Lock};
 
 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
@@ -88,6 +89,12 @@ impl Lit {
             ByteStr(_) | ByteStrRaw(..) => "byte string"
         }
     }
+
+    // See comments in `interpolated_to_tokenstream` for why we care about
+    // *probably* equal here rather than actual equality
+    fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool {
+        mem::discriminant(self) == mem::discriminant(other)
+    }
 }
 
 pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
@@ -530,14 +537,6 @@ impl Token {
         // stream they came from. Here we attempt to extract these
         // lossless token streams before we fall back to the
         // stringification.
-        //
-        // During early phases of the compiler, though, the AST could
-        // get modified directly (e.g. attributes added or removed) and
-        // the internal cache of tokens my not be invalidated or
-        // updated. Consequently if the "lossless" token stream
-        // disagrees with our actuall stringification (which has
-        // historically been much more battle-tested) then we go with
-        // the lossy stream anyway (losing span information).
         let mut tokens = None;
 
         match nt.0 {
@@ -569,13 +568,96 @@ impl Token {
             let source = pprust::token_to_string(self);
             parse_stream_from_source_str(FileName::MacroExpansion, source, sess, Some(span))
         });
+
+        // During early phases of the compiler the AST could get modified
+        // directly (e.g. attributes added or removed) and the internal cache
+        // of tokens my not be invalidated or updated. Consequently if the
+        // "lossless" token stream disagrees with our actual stringification
+        // (which has historically been much more battle-tested) then we go
+        // with the lossy stream anyway (losing span information).
+        //
+        // Note that the comparison isn't `==` here to avoid comparing spans,
+        // but it *also* is a "probable" equality which is a pretty weird
+        // definition. We mostly want to catch actual changes to the AST
+        // like a `#[cfg]` being processed or some weird `macro_rules!`
+        // expansion.
+        //
+        // What we *don't* want to catch is the fact that a user-defined
+        // literal like `0xf` is stringified as `15`, causing the cached token
+        // stream to not be literal `==` token-wise (ignoring spans) to the
+        // token stream we got from stringification.
+        //
+        // Instead the "probably equal" check here is "does each token
+        // recursively have the same discriminant?" We basically don't look at
+        // the token values here and assume that such fine grained modifications
+        // of token streams doesn't happen.
         if let Some(tokens) = tokens {
-            if tokens.eq_unspanned(&tokens_for_real) {
+            if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
                 return tokens
             }
         }
         return tokens_for_real
     }
+
+    // See comments in `interpolated_to_tokenstream` for why we care about
+    // *probably* equal here rather than actual equality
+    pub fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
+        if mem::discriminant(self) != mem::discriminant(other) {
+            return false
+        }
+        match (self, other) {
+            (&Eq, &Eq) |
+            (&Lt, &Lt) |
+            (&Le, &Le) |
+            (&EqEq, &EqEq) |
+            (&Ne, &Ne) |
+            (&Ge, &Ge) |
+            (&Gt, &Gt) |
+            (&AndAnd, &AndAnd) |
+            (&OrOr, &OrOr) |
+            (&Not, &Not) |
+            (&Tilde, &Tilde) |
+            (&At, &At) |
+            (&Dot, &Dot) |
+            (&DotDot, &DotDot) |
+            (&DotDotDot, &DotDotDot) |
+            (&DotDotEq, &DotDotEq) |
+            (&DotEq, &DotEq) |
+            (&Comma, &Comma) |
+            (&Semi, &Semi) |
+            (&Colon, &Colon) |
+            (&ModSep, &ModSep) |
+            (&RArrow, &RArrow) |
+            (&LArrow, &LArrow) |
+            (&FatArrow, &FatArrow) |
+            (&Pound, &Pound) |
+            (&Dollar, &Dollar) |
+            (&Question, &Question) |
+            (&Whitespace, &Whitespace) |
+            (&Comment, &Comment) |
+            (&Eof, &Eof) => true,
+
+            (&BinOp(a), &BinOp(b)) |
+            (&BinOpEq(a), &BinOpEq(b)) => a == b,
+
+            (&OpenDelim(a), &OpenDelim(b)) |
+            (&CloseDelim(a), &CloseDelim(b)) => a == b,
+
+            (&DocComment(a), &DocComment(b)) |
+            (&Shebang(a), &Shebang(b)) => a == b,
+
+            (&Lifetime(a), &Lifetime(b)) => a.name == b.name,
+            (&Ident(a, b), &Ident(c, d)) => a.name == c.name && b == d,
+
+            (&Literal(ref a, b), &Literal(ref c, d)) => {
+                b == d && a.probably_equal_for_proc_macro(c)
+            }
+
+            (&Interpolated(_), &Interpolated(_)) => false,
+
+            _ => panic!("forgot to add a token?"),
+        }
+    }
 }
 
 #[derive(Clone, RustcEncodable, RustcDecodable, Eq, Hash)]
diff --git a/src/libsyntax/tokenstream.rs b/src/libsyntax/tokenstream.rs
index 6ac04b3cdf6..e2b5c4e1adf 100644
--- a/src/libsyntax/tokenstream.rs
+++ b/src/libsyntax/tokenstream.rs
@@ -124,6 +124,24 @@ impl TokenTree {
         }
     }
 
+    // See comments in `interpolated_to_tokenstream` for why we care about
+    // *probably* equal here rather than actual equality
+    //
+    // This is otherwise the same as `eq_unspanned`, only recursing with a
+    // different method.
+    pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool {
+        match (self, other) {
+            (&TokenTree::Token(_, ref tk), &TokenTree::Token(_, ref tk2)) => {
+                tk.probably_equal_for_proc_macro(tk2)
+            }
+            (&TokenTree::Delimited(_, ref dl), &TokenTree::Delimited(_, ref dl2)) => {
+                dl.delim == dl2.delim &&
+                dl.stream().probably_equal_for_proc_macro(&dl2.stream())
+            }
+            (_, _) => false,
+        }
+    }
+
     /// Retrieve the TokenTree's span.
     pub fn span(&self) -> Span {
         match *self {
@@ -250,6 +268,22 @@ impl TokenStream {
         t1.next().is_none() && t2.next().is_none()
     }
 
+    // See comments in `interpolated_to_tokenstream` for why we care about
+    // *probably* equal here rather than actual equality
+    //
+    // This is otherwise the same as `eq_unspanned`, only recursing with a
+    // different method.
+    pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool {
+        let mut t1 = self.trees();
+        let mut t2 = other.trees();
+        for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
+            if !t1.probably_equal_for_proc_macro(&t2) {
+                return false;
+            }
+        }
+        t1.next().is_none() && t2.next().is_none()
+    }
+
     /// Precondition: `self` consists of a single token tree.
     /// Returns true if the token tree is a joint operation w.r.t. `proc_macro::TokenNode`.
     pub fn as_tree(self) -> (TokenTree, bool /* joint? */) {
author	Alex Crichton <alex@alexcrichton.com>	2018-04-18 19:36:48 -0700
committer	Alex Crichton <alex@alexcrichton.com>	2018-04-18 19:36:48 -0700
commit	e9348738fca8c88dca6e343702eaa12ffc8df34c (patch)
tree	e778a2aabb658e2ef5754798bf0f4d4f63560422 /src/libsyntax
parent	ac3c2288f9f9d977acb46406ba60033d65165a7b (diff)
download	rust-e9348738fca8c88dca6e343702eaa12ffc8df34c.tar.gz rust-e9348738fca8c88dca6e343702eaa12ffc8df34c.zip