Auto merge of #63709 - matklad:decomposed-tokens, r=petrochenkov

Move token gluing to token stream parsing work towards #63689, this moves token gluing from the lexer to the token tree layer. This is only a minimal step, but I like the negative diff here. r? @petrochenkov
author: bors <bors@rust-lang.org> 2019-08-20 07:44:44 +0000
committer: bors <bors@rust-lang.org> 2019-08-20 07:44:44 +0000
commit: 14890954ce17c44d944eda988c5a64bb4c5ec9eb (patch)
tree: 31d7eff7318bb0fff0f541c6c8f6f57f9b2576ba /src
parent: 7858dc237d70fc0c5a31eb528dfab1ad0baf6a27 (diff)
parent: 914e1f456415eae0ae095dd39dc51c115c1ffb5a (diff)
download: rust-14890954ce17c44d944eda988c5a64bb4c5ec9eb.tar.gz
rust-14890954ce17c44d944eda988c5a64bb4c5ec9eb.zip
6 files changed, 68 insertions, 209 deletions
diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs
index afef307a0ed..41b47befaf1 100644
--- a/src/librustc_lexer/src/lib.rs
+++ b/src/librustc_lexer/src/lib.rs
@@ -23,9 +23,6 @@ pub enum TokenKind {
     Lifetime { starts_with_number: bool },
     Semi,
     Comma,
-    DotDotDot,
-    DotDotEq,
-    DotDot,
     Dot,
     OpenParen,
     CloseParen,
@@ -37,41 +34,19 @@ pub enum TokenKind {
     Pound,
     Tilde,
     Question,
-    ColonColon,
     Colon,
     Dollar,
-    EqEq,
     Eq,
-    FatArrow,
-    Ne,
     Not,
-    Le,
-    LArrow,
     Lt,
-    ShlEq,
-    Shl,
-    Ge,
     Gt,
-    ShrEq,
-    Shr,
-    RArrow,
     Minus,
-    MinusEq,
     And,
-    AndAnd,
-    AndEq,
     Or,
-    OrOr,
-    OrEq,
-    PlusEq,
     Plus,
-    StarEq,
     Star,
-    SlashEq,
     Slash,
-    CaretEq,
     Caret,
-    PercentEq,
     Percent,
     Unknown,
 }
@@ -135,13 +110,7 @@ impl Cursor<'_> {
             '/' => match self.nth_char(0) {
                 '/' => self.line_comment(),
                 '*' => self.block_comment(),
-                _ => {
-                    if self.eat_assign() {
-                        SlashEq
-                    } else {
-                        Slash
-                    }
-                }
+                _ => Slash,
             },
             c if character_properties::is_whitespace(c) => self.whitespace(),
             'r' => match (self.nth_char(0), self.nth_char(1)) {
@@ -199,22 +168,7 @@ impl Cursor<'_> {
             }
             ';' => Semi,
             ',' => Comma,
-            '.' => {
-                if self.nth_char(0) == '.' {
-                    self.bump();
-                    if self.nth_char(0) == '.' {
-                        self.bump();
-                        DotDotDot
-                    } else if self.nth_char(0) == '=' {
-                        self.bump();
-                        DotDotEq
-                    } else {
-                        DotDot
-                    }
-                } else {
-                    Dot
-                }
-            }
+            '.' => Dot,
             '(' => OpenParen,
             ')' => CloseParen,
             '{' => OpenBrace,
@@ -225,112 +179,19 @@ impl Cursor<'_> {
             '#' => Pound,
             '~' => Tilde,
             '?' => Question,
-            ':' => {
-                if self.nth_char(0) == ':' {
-                    self.bump();
-                    ColonColon
-                } else {
-                    Colon
-                }
-            }
+            ':' => Colon,
             '$' => Dollar,
-            '=' => {
-                if self.nth_char(0) == '=' {
-                    self.bump();
-                    EqEq
-                } else if self.nth_char(0) == '>' {
-                    self.bump();
-                    FatArrow
-                } else {
-                    Eq
-                }
-            }
-            '!' => {
-                if self.nth_char(0) == '=' {
-                    self.bump();
-                    Ne
-                } else {
-                    Not
-                }
-            }
-            '<' => match self.nth_char(0) {
-                '=' => {
-                    self.bump();
-                    Le
-                }
-                '<' => {
-                    self.bump();
-                    if self.eat_assign() { ShlEq } else { Shl }
-                }
-                '-' => {
-                    self.bump();
-                    LArrow
-                }
-                _ => Lt,
-            },
-            '>' => match self.nth_char(0) {
-                '=' => {
-                    self.bump();
-                    Ge
-                }
-                '>' => {
-                    self.bump();
-                    if self.eat_assign() { ShrEq } else { Shr }
-                }
-                _ => Gt,
-            },
-            '-' => {
-                if self.nth_char(0) == '>' {
-                    self.bump();
-                    RArrow
-                } else {
-                    if self.eat_assign() { MinusEq } else { Minus }
-                }
-            }
-            '&' => {
-                if self.nth_char(0) == '&' {
-                    self.bump();
-                    AndAnd
-                } else {
-                    if self.eat_assign() { AndEq } else { And }
-                }
-            }
-            '|' => {
-                if self.nth_char(0) == '|' {
-                    self.bump();
-                    OrOr
-                } else {
-                    if self.eat_assign() { OrEq } else { Or }
-                }
-            }
-            '+' => {
-                if self.eat_assign() {
-                    PlusEq
-                } else {
-                    Plus
-                }
-            }
-            '*' => {
-                if self.eat_assign() {
-                    StarEq
-                } else {
-                    Star
-                }
-            }
-            '^' => {
-                if self.eat_assign() {
-                    CaretEq
-                } else {
-                    Caret
-                }
-            }
-            '%' => {
-                if self.eat_assign() {
-                    PercentEq
-                } else {
-                    Percent
-                }
-            }
+            '=' => Eq,
+            '!' => Not,
+            '<' => Lt,
+            '>' => Gt,
+            '-' => Minus,
+            '&' => And,
+            '|' => Or,
+            '+' => Plus,
+            '*' => Star,
+            '^' => Caret,
+            '%' => Percent,
             '\'' => self.lifetime_or_char(),
             '"' => {
                 let terminated = self.double_quoted_string();
@@ -643,15 +504,6 @@ impl Cursor<'_> {
             self.bump();
         }
     }
-
-    fn eat_assign(&mut self) -> bool {
-        if self.nth_char(0) == '=' {
-            self.bump();
-            true
-        } else {
-            false
-        }
-    }
 }
 
 pub mod character_properties {
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index bdf468a52bb..66add869359 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -273,9 +273,6 @@ impl<'a> StringReader<'a> {
             }
             rustc_lexer::TokenKind::Semi => token::Semi,
             rustc_lexer::TokenKind::Comma => token::Comma,
-            rustc_lexer::TokenKind::DotDotDot => token::DotDotDot,
-            rustc_lexer::TokenKind::DotDotEq => token::DotDotEq,
-            rustc_lexer::TokenKind::DotDot => token::DotDot,
             rustc_lexer::TokenKind::Dot => token::Dot,
             rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
             rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
@@ -287,42 +284,20 @@ impl<'a> StringReader<'a> {
             rustc_lexer::TokenKind::Pound => token::Pound,
             rustc_lexer::TokenKind::Tilde => token::Tilde,
             rustc_lexer::TokenKind::Question => token::Question,
-            rustc_lexer::TokenKind::ColonColon => token::ModSep,
             rustc_lexer::TokenKind::Colon => token::Colon,
             rustc_lexer::TokenKind::Dollar => token::Dollar,
-            rustc_lexer::TokenKind::EqEq => token::EqEq,
             rustc_lexer::TokenKind::Eq => token::Eq,
-            rustc_lexer::TokenKind::FatArrow => token::FatArrow,
-            rustc_lexer::TokenKind::Ne => token::Ne,
             rustc_lexer::TokenKind::Not => token::Not,
-            rustc_lexer::TokenKind::Le => token::Le,
-            rustc_lexer::TokenKind::LArrow => token::LArrow,
             rustc_lexer::TokenKind::Lt => token::Lt,
-            rustc_lexer::TokenKind::ShlEq => token::BinOpEq(token::Shl),
-            rustc_lexer::TokenKind::Shl => token::BinOp(token::Shl),
-            rustc_lexer::TokenKind::Ge => token::Ge,
             rustc_lexer::TokenKind::Gt => token::Gt,
-            rustc_lexer::TokenKind::ShrEq => token::BinOpEq(token::Shr),
-            rustc_lexer::TokenKind::Shr => token::BinOp(token::Shr),
-            rustc_lexer::TokenKind::RArrow => token::RArrow,
             rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
-            rustc_lexer::TokenKind::MinusEq => token::BinOpEq(token::Minus),
             rustc_lexer::TokenKind::And => token::BinOp(token::And),
-            rustc_lexer::TokenKind::AndEq => token::BinOpEq(token::And),
-            rustc_lexer::TokenKind::AndAnd => token::AndAnd,
             rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
-            rustc_lexer::TokenKind::OrEq => token::BinOpEq(token::Or),
-            rustc_lexer::TokenKind::OrOr => token::OrOr,
             rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
-            rustc_lexer::TokenKind::PlusEq => token::BinOpEq(token::Plus),
             rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
-            rustc_lexer::TokenKind::StarEq => token::BinOpEq(token::Star),
             rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
-            rustc_lexer::TokenKind::SlashEq => token::BinOpEq(token::Slash),
             rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
-            rustc_lexer::TokenKind::CaretEq => token::BinOpEq(token::Caret),
             rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
-            rustc_lexer::TokenKind::PercentEq => token::BinOpEq(token::Percent),
 
             rustc_lexer::TokenKind::Unknown => {
                 let c = self.str_from(start).chars().next().unwrap();
diff --git a/src/libsyntax/parse/lexer/tests.rs b/src/libsyntax/parse/lexer/tests.rs
index 94570140996..a915aa42fd1 100644
--- a/src/libsyntax/parse/lexer/tests.rs
+++ b/src/libsyntax/parse/lexer/tests.rs
@@ -75,42 +75,50 @@ fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind
 }
 
 #[test]
-fn doublecolonparsing() {
+fn doublecolon_parsing() {
     with_default_globals(|| {
         let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
         let sh = mk_sess(sm.clone());
-        check_tokenization(setup(&sm, &sh, "a b".to_string()),
-                        vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
+        check_tokenization(
+            setup(&sm, &sh, "a b".to_string()),
+            vec![mk_ident("a"), token::Whitespace, mk_ident("b")],
+        );
     })
 }
 
 #[test]
-fn dcparsing_2() {
+fn doublecolon_parsing_2() {
     with_default_globals(|| {
         let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
         let sh = mk_sess(sm.clone());
-        check_tokenization(setup(&sm, &sh, "a::b".to_string()),
-                        vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
+        check_tokenization(
+            setup(&sm, &sh, "a::b".to_string()),
+            vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")],
+        );
     })
 }
 
 #[test]
-fn dcparsing_3() {
+fn doublecolon_parsing_3() {
     with_default_globals(|| {
         let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
         let sh = mk_sess(sm.clone());
-        check_tokenization(setup(&sm, &sh, "a ::b".to_string()),
-                        vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
+        check_tokenization(
+            setup(&sm, &sh, "a ::b".to_string()),
+            vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")],
+        );
     })
 }
 
 #[test]
-fn dcparsing_4() {
+fn doublecolon_parsing_4() {
     with_default_globals(|| {
         let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
         let sh = mk_sess(sm.clone());
-        check_tokenization(setup(&sm, &sh, "a:: b".to_string()),
-                        vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
+        check_tokenization(
+            setup(&sm, &sh, "a:: b".to_string()),
+            vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")],
+        );
     })
 }
 
diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs
index 37e67a2729e..e5ba7e45309 100644
--- a/src/libsyntax/parse/lexer/tokentrees.rs
+++ b/src/libsyntax/parse/lexer/tokentrees.rs
@@ -39,29 +39,29 @@ struct TokenTreesReader<'a> {
 impl<'a> TokenTreesReader<'a> {
     // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`.
     fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> {
-        let mut tts = Vec::new();
+        let mut buf = TokenStreamBuilder::default();
 
         self.real_token();
         while self.token != token::Eof {
-            tts.push(self.parse_token_tree()?);
+            buf.push(self.parse_token_tree()?);
         }
 
-        Ok(TokenStream::new(tts))
+        Ok(buf.into_token_stream())
     }
 
     // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`.
     fn parse_token_trees_until_close_delim(&mut self) -> TokenStream {
-        let mut tts = vec![];
+        let mut buf = TokenStreamBuilder::default();
         loop {
             if let token::CloseDelim(..) = self.token.kind {
-                return TokenStream::new(tts);
+                return buf.into_token_stream();
             }
 
             match self.parse_token_tree() {
-                Ok(tree) => tts.push(tree),
+                Ok(tree) => buf.push(tree),
                 Err(mut e) => {
                     e.emit();
-                    return TokenStream::new(tts);
+                    return buf.into_token_stream();
                 }
             }
         }
@@ -223,8 +223,32 @@ impl<'a> TokenTreesReader<'a> {
                 _ => {
                     self.token = token;
                     return;
-                },
+                }
+            }
+        }
+    }
+}
+
+#[derive(Default)]
+struct TokenStreamBuilder {
+    buf: Vec<TreeAndJoint>,
+}
+
+impl TokenStreamBuilder {
+    fn push(&mut self, (tree, joint): TreeAndJoint) {
+        if let Some((TokenTree::Token(prev_token), Joint)) = self.buf.last() {
+            if let TokenTree::Token(token) = &tree {
+                if let Some(glued) = prev_token.glue(token) {
+                    self.buf.pop();
+                    self.buf.push((TokenTree::Token(glued), joint));
+                    return;
+                }
             }
         }
+        self.buf.push((tree, joint))
+    }
+
+    fn into_token_stream(self) -> TokenStream {
+        TokenStream::new(self.buf)
     }
 }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index be800b4de66..1865f925165 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -551,7 +551,7 @@ impl Token {
         }
     }
 
-    crate fn glue(self, joint: Token) -> Option<Token> {
+    crate fn glue(&self, joint: &Token) -> Option<Token> {
         let kind = match self.kind {
             Eq => match joint.kind {
                 Eq => EqEq,
diff --git a/src/libsyntax/tokenstream.rs b/src/libsyntax/tokenstream.rs
index 6ff8898fe21..09a1b93c7bb 100644
--- a/src/libsyntax/tokenstream.rs
+++ b/src/libsyntax/tokenstream.rs
@@ -414,7 +414,7 @@ impl TokenStreamBuilder {
         let last_tree_if_joint = self.0.last().and_then(TokenStream::last_tree_if_joint);
         if let Some(TokenTree::Token(last_token)) = last_tree_if_joint {
             if let Some((TokenTree::Token(token), is_joint)) = stream.first_tree_and_joint() {
-                if let Some(glued_tok) = last_token.glue(token) {
+                if let Some(glued_tok) = last_token.glue(&token) {
                     let last_stream = self.0.pop().unwrap();
                     self.push_all_but_last_tree(&last_stream);
                     let glued_tt = TokenTree::Token(glued_tok);
author	bors <bors@rust-lang.org>	2019-08-20 07:44:44 +0000
committer	bors <bors@rust-lang.org>	2019-08-20 07:44:44 +0000
commit	14890954ce17c44d944eda988c5a64bb4c5ec9eb (patch)
tree	31d7eff7318bb0fff0f541c6c8f6f57f9b2576ba /src
parent	7858dc237d70fc0c5a31eb528dfab1ad0baf6a27 (diff)
parent	914e1f456415eae0ae095dd39dc51c115c1ffb5a (diff)
download	rust-14890954ce17c44d944eda988c5a64bb4c5ec9eb.tar.gz rust-14890954ce17c44d944eda988c5a64bb4c5ec9eb.zip