about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLukas Wirth <lukastw97@gmail.com>2023-02-03 17:18:48 +0100
committerLukas Wirth <lukastw97@gmail.com>2023-02-03 17:18:48 +0100
commit6fa6efe90fc8a79395cacb5c71315f0e2b32e623 (patch)
treeeae05eee4e4201e3034869dbcab01f394f07b9b3
parentdab685dd87ba99f6c0f005f2ce7b0a3c10dada22 (diff)
downloadrust-6fa6efe90fc8a79395cacb5c71315f0e2b32e623.tar.gz
rust-6fa6efe90fc8a79395cacb5c71315f0e2b32e623.zip
fix: Fix parsing of nested tuple field accesses in a cursed way
-rw-r--r--crates/parser/src/event.rs10
-rw-r--r--crates/parser/src/grammar/expressions.rs98
-rw-r--r--crates/parser/src/lib.rs2
-rw-r--r--crates/parser/src/output.rs12
-rw-r--r--crates/parser/src/parser.rs32
-rw-r--r--crates/parser/src/shortcuts.rs54
-rw-r--r--crates/parser/src/tests/prefix_entries.rs4
-rw-r--r--crates/parser/test_data/parser/inline/ok/0011_field_expr.rast33
-rw-r--r--crates/parser/test_data/parser/inline/ok/0011_field_expr.rs2
-rw-r--r--crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast43
-rw-r--r--crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs2
-rw-r--r--crates/parser/test_data/parser/inline/ok/0137_await_expr.rast35
-rw-r--r--crates/parser/test_data/parser/inline/ok/0137_await_expr.rs2
13 files changed, 294 insertions, 35 deletions
diff --git a/crates/parser/src/event.rs b/crates/parser/src/event.rs
index b0e70e79430..fb2616cf013 100644
--- a/crates/parser/src/event.rs
+++ b/crates/parser/src/event.rs
@@ -72,9 +72,12 @@ pub(crate) enum Event {
     /// `n_raw_tokens = 2` is used to produced a single `>>`.
     Token {
         kind: SyntaxKind,
+        // Consider custom enum here?
         n_raw_tokens: u8,
     },
-
+    FloatSplitHack {
+        has_pseudo_dot: bool,
+    },
     Error {
         msg: String,
     },
@@ -125,6 +128,11 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
             Event::Token { kind, n_raw_tokens } => {
                 res.token(kind, n_raw_tokens);
             }
+            Event::FloatSplitHack { has_pseudo_dot } => {
+                res.float_split_hack(has_pseudo_dot);
+                let ev = mem::replace(&mut events[i + 1], Event::tombstone());
+                assert!(matches!(ev, Event::Finish), "{ev:?}");
+            }
             Event::Error { msg } => res.error(msg),
         }
     }
diff --git a/crates/parser/src/grammar/expressions.rs b/crates/parser/src/grammar/expressions.rs
index 8932330b825..7516ac3c4bd 100644
--- a/crates/parser/src/grammar/expressions.rs
+++ b/crates/parser/src/grammar/expressions.rs
@@ -379,7 +379,7 @@ fn postfix_expr(
             // }
             T!['('] if allow_calls => call_expr(p, lhs),
             T!['['] if allow_calls => index_expr(p, lhs),
-            T![.] => match postfix_dot_expr(p, lhs) {
+            T![.] => match postfix_dot_expr::<false>(p, lhs) {
                 Ok(it) => it,
                 Err(it) => {
                     lhs = it;
@@ -393,35 +393,44 @@ fn postfix_expr(
         block_like = BlockLike::NotBlock;
     }
     return (lhs, block_like);
+}
 
-    fn postfix_dot_expr(
-        p: &mut Parser<'_>,
-        lhs: CompletedMarker,
-    ) -> Result<CompletedMarker, CompletedMarker> {
+fn postfix_dot_expr<const FLOAT_RECOVERY: bool>(
+    p: &mut Parser<'_>,
+    lhs: CompletedMarker,
+) -> Result<CompletedMarker, CompletedMarker> {
+    if !FLOAT_RECOVERY {
         assert!(p.at(T![.]));
-        if p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])) {
-            return Ok(method_call_expr(p, lhs));
-        }
+    }
+    let nth1 = if FLOAT_RECOVERY { 0 } else { 1 };
+    let nth2 = if FLOAT_RECOVERY { 1 } else { 2 };
 
-        // test await_expr
-        // fn foo() {
-        //     x.await;
-        //     x.0.await;
-        //     x.0().await?.hello();
-        // }
-        if p.nth(1) == T![await] {
-            let m = lhs.precede(p);
-            p.bump(T![.]);
-            p.bump(T![await]);
-            return Ok(m.complete(p, AWAIT_EXPR));
-        }
+    if p.nth(nth1) == IDENT && (p.nth(nth2) == T!['('] || p.nth_at(nth2, T![::])) {
+        return Ok(method_call_expr::<FLOAT_RECOVERY>(p, lhs));
+    }
 
-        if p.at(T![..=]) || p.at(T![..]) {
-            return Err(lhs);
+    // test await_expr
+    // fn foo() {
+    //     x.await;
+    //     x.0.await;
+    //     x.0().await?.hello();
+    //     x.0.0.await;
+    //     x.0. await;
+    // }
+    if p.nth(nth1) == T![await] {
+        let m = lhs.precede(p);
+        if !FLOAT_RECOVERY {
+            p.bump(T![.]);
         }
+        p.bump(T![await]);
+        return Ok(m.complete(p, AWAIT_EXPR));
+    }
 
-        Ok(field_expr(p, lhs))
+    if p.at(T![..=]) || p.at(T![..]) {
+        return Err(lhs);
     }
+
+    field_expr::<FLOAT_RECOVERY>(p, lhs)
 }
 
 // test call_expr
@@ -455,11 +464,22 @@ fn index_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
 // fn foo() {
 //     x.foo();
 //     y.bar::<T>(1, 2,);
+//     x.0.0.call();
+//     x.0. call();
 // }
-fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
-    assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])));
+fn method_call_expr<const FLOAT_RECOVERY: bool>(
+    p: &mut Parser<'_>,
+    lhs: CompletedMarker,
+) -> CompletedMarker {
+    if FLOAT_RECOVERY {
+        assert!(p.nth(0) == IDENT && (p.nth(1) == T!['('] || p.nth_at(1, T![::])));
+    } else {
+        assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])));
+    }
     let m = lhs.precede(p);
-    p.bump_any();
+    if !FLOAT_RECOVERY {
+        p.bump(T![.]);
+    }
     name_ref(p);
     generic_args::opt_generic_arg_list(p, true);
     if p.at(T!['(']) {
@@ -472,21 +492,35 @@ fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker
 // fn foo() {
 //     x.foo;
 //     x.0.bar;
+//     x.0.1;
+//     x.0. bar;
 //     x.0();
 // }
-fn field_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
-    assert!(p.at(T![.]));
+fn field_expr<const FLOAT_RECOVERY: bool>(
+    p: &mut Parser<'_>,
+    lhs: CompletedMarker,
+) -> Result<CompletedMarker, CompletedMarker> {
+    if !FLOAT_RECOVERY {
+        assert!(p.at(T![.]));
+    }
     let m = lhs.precede(p);
-    p.bump(T![.]);
+    if !FLOAT_RECOVERY {
+        p.bump(T![.]);
+    }
     if p.at(IDENT) || p.at(INT_NUMBER) {
         name_ref_or_index(p);
     } else if p.at(FLOAT_NUMBER) {
-        // FIXME: How to recover and instead parse INT + T![.]?
-        p.bump_any();
+        return match p.split_float(m) {
+            (true, m) => {
+                let lhs = m.complete(p, FIELD_EXPR);
+                postfix_dot_expr::<true>(p, lhs)
+            }
+            (false, m) => Ok(m.complete(p, FIELD_EXPR)),
+        };
     } else {
         p.error("expected field name or number");
     }
-    m.complete(p, FIELD_EXPR)
+    Ok(m.complete(p, FIELD_EXPR))
 }
 
 // test try_expr
diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs
index 87be4792773..f20d32d6cf8 100644
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@@ -102,7 +102,7 @@ impl TopEntryPoint {
                 match step {
                     Step::Enter { .. } => depth += 1,
                     Step::Exit => depth -= 1,
-                    Step::Token { .. } | Step::Error { .. } => (),
+                    Step::FloatSplit { .. } | Step::Token { .. } | Step::Error { .. } => (),
                 }
             }
             assert!(!first, "no tree at all");
diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs
index 3de6c0aba86..9587c8cb1ba 100644
--- a/crates/parser/src/output.rs
+++ b/crates/parser/src/output.rs
@@ -25,6 +25,7 @@ pub struct Output {
 #[derive(Debug)]
 pub enum Step<'a> {
     Token { kind: SyntaxKind, n_input_tokens: u8 },
+    FloatSplit { has_pseudo_dot: bool },
     Enter { kind: SyntaxKind },
     Exit,
     Error { msg: &'a str },
@@ -44,6 +45,7 @@ impl Output {
     const TOKEN_EVENT: u8 = 0;
     const ENTER_EVENT: u8 = 1;
     const EXIT_EVENT: u8 = 2;
+    const SPLIT_EVENT: u8 = 3;
 
     pub fn iter(&self) -> impl Iterator<Item = Step<'_>> {
         self.event.iter().map(|&event| {
@@ -67,6 +69,9 @@ impl Output {
                     Step::Enter { kind }
                 }
                 Self::EXIT_EVENT => Step::Exit,
+                Self::SPLIT_EVENT => {
+                    Step::FloatSplit { has_pseudo_dot: event & Self::N_INPUT_TOKEN_MASK != 0 }
+                }
                 _ => unreachable!(),
             }
         })
@@ -79,6 +84,13 @@ impl Output {
         self.event.push(e)
     }
 
+    pub(crate) fn float_split_hack(&mut self, has_pseudo_dot: bool) {
+        let e = (Self::SPLIT_EVENT as u32) << Self::TAG_SHIFT
+            | ((has_pseudo_dot as u32) << Self::N_INPUT_TOKEN_SHIFT)
+            | Self::EVENT_MASK;
+        self.event.push(e);
+    }
+
     pub(crate) fn enter_node(&mut self, kind: SyntaxKind) {
         let e = ((kind as u16 as u32) << Self::KIND_SHIFT)
             | ((Self::ENTER_EVENT as u32) << Self::TAG_SHIFT)
diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs
index 48aecb35be1..0f4fa602291 100644
--- a/crates/parser/src/parser.rs
+++ b/crates/parser/src/parser.rs
@@ -181,6 +181,38 @@ impl<'t> Parser<'t> {
         self.do_bump(kind, 1);
     }
 
+    /// Advances the parser by one token
+    pub(crate) fn split_float(&mut self, marker: Marker) -> (bool, Marker) {
+        assert!(self.at(SyntaxKind::FLOAT_NUMBER));
+        // we have parse `<something>.`
+        // `<something>`.0.1
+        // here we need to insert an extra event
+        //
+        // `<something>`. 0. 1;
+        // here we need to change the follow up parse, the return value will cause us to emulate a dot
+        // the actual splitting happens later
+        let has_pseudo_dot = !self.inp.is_joint(self.pos);
+        let marker = if !has_pseudo_dot {
+            let new_pos = self.start();
+            let idx = marker.pos as usize;
+            match &mut self.events[idx] {
+                Event::Start { forward_parent, kind } => {
+                    *kind = SyntaxKind::FIELD_EXPR;
+                    *forward_parent = Some(new_pos.pos - marker.pos);
+                }
+                _ => unreachable!(),
+            }
+            // NOTE: This brings the start / finish pairs out of balance!
+            std::mem::forget(marker);
+            new_pos
+        } else {
+            marker
+        };
+        self.pos += 1 as usize;
+        self.push_event(Event::FloatSplitHack { has_pseudo_dot });
+        (has_pseudo_dot, marker)
+    }
+
     /// Advances the parser by one token, remapping its kind.
     /// This is useful to create contextual keywords from
     /// identifiers. For example, the lexer creates a `union`
diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs
index 2be4050d135..18a6f838fac 100644
--- a/crates/parser/src/shortcuts.rs
+++ b/crates/parser/src/shortcuts.rs
@@ -44,7 +44,17 @@ impl<'a> LexedStr<'a> {
                     }
                     res.push(kind);
                 }
-                was_joint = true;
+                if kind == SyntaxKind::FLOAT_NUMBER {
+                    // we set jointness for floating point numbers as a hack to inform the
+                    // parser about whether we have a `0.` or `0.1` style float
+                    if self.text(i).split_once('.').map_or(false, |(_, it)| it.is_empty()) {
+                        was_joint = false;
+                    } else {
+                        was_joint = true;
+                    }
+                } else {
+                    was_joint = true;
+                }
             }
         }
         res
@@ -63,6 +73,7 @@ impl<'a> LexedStr<'a> {
                 Step::Token { kind, n_input_tokens: n_raw_tokens } => {
                     builder.token(kind, n_raw_tokens)
                 }
+                Step::FloatSplit { has_pseudo_dot } => builder.float_split(has_pseudo_dot),
                 Step::Enter { kind } => builder.enter(kind),
                 Step::Exit => builder.exit(),
                 Step::Error { msg } => {
@@ -109,6 +120,16 @@ impl Builder<'_, '_> {
         self.do_token(kind, n_tokens as usize);
     }
 
+    fn float_split(&mut self, has_pseudo_dot: bool) {
+        match mem::replace(&mut self.state, State::Normal) {
+            State::PendingEnter => unreachable!(),
+            State::PendingExit => (self.sink)(StrStep::Exit),
+            State::Normal => (),
+        }
+        self.eat_trivias();
+        self.do_float_split(has_pseudo_dot);
+    }
+
     fn enter(&mut self, kind: SyntaxKind) {
         match mem::replace(&mut self.state, State::Normal) {
             State::PendingEnter => {
@@ -164,6 +185,37 @@ impl Builder<'_, '_> {
         self.pos += n_tokens;
         (self.sink)(StrStep::Token { kind, text });
     }
+
+    fn do_float_split(&mut self, has_pseudo_dot: bool) {
+        let text = &self.lexed.range_text(self.pos..self.pos + 1);
+        self.pos += 1;
+        match text.split_once('.') {
+            Some((left, right)) => {
+                assert!(!left.is_empty());
+                (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
+                (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: left });
+                (self.sink)(StrStep::Exit);
+
+                // here we move the exit up, the original exit has been deleted in process
+                (self.sink)(StrStep::Exit);
+
+                (self.sink)(StrStep::Token { kind: SyntaxKind::DOT, text: "." });
+
+                if has_pseudo_dot {
+                    assert!(right.is_empty());
+                    self.state = State::Normal;
+                } else {
+                    (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
+                    (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: right });
+                    (self.sink)(StrStep::Exit);
+
+                    // the parser creates an unbalanced start node, we are required to close it here
+                    self.state = State::PendingExit;
+                }
+            }
+            None => unreachable!(),
+        }
+    }
 }
 
 fn n_attached_trivias<'a>(
diff --git a/crates/parser/src/tests/prefix_entries.rs b/crates/parser/src/tests/prefix_entries.rs
index e626b4f27e0..40f92e58804 100644
--- a/crates/parser/src/tests/prefix_entries.rs
+++ b/crates/parser/src/tests/prefix_entries.rs
@@ -51,6 +51,9 @@ fn expr() {
     check(PrefixEntryPoint::Expr, "-1", "-1");
     check(PrefixEntryPoint::Expr, "fn foo() {}", "fn");
     check(PrefixEntryPoint::Expr, "#[attr] ()", "#[attr] ()");
+    check(PrefixEntryPoint::Expr, "foo.0", "foo.0");
+    check(PrefixEntryPoint::Expr, "foo.0.1", "foo.0.1");
+    check(PrefixEntryPoint::Expr, "foo.0. foo", "foo.0. foo");
 }
 
 #[test]
@@ -88,6 +91,7 @@ fn check(entry: PrefixEntryPoint, input: &str, prefix: &str) {
     for step in entry.parse(&input).iter() {
         match step {
             Step::Token { n_input_tokens, .. } => n_tokens += n_input_tokens as usize,
+            Step::FloatSplit { .. } => n_tokens += 1,
             Step::Enter { .. } | Step::Exit | Step::Error { .. } => (),
         }
     }
diff --git a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast
index 8498724b9ef..dd27dc48964 100644
--- a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast
+++ b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast
@@ -41,6 +41,39 @@ SOURCE_FILE
           SEMICOLON ";"
         WHITESPACE "\n    "
         EXPR_STMT
+          FIELD_EXPR
+            FIELD_EXPR
+              PATH_EXPR
+                PATH
+                  PATH_SEGMENT
+                    NAME_REF
+                      IDENT "x"
+              DOT "."
+              NAME_REF
+                INT_NUMBER "0"
+            DOT "."
+            NAME_REF
+              INT_NUMBER "1"
+          SEMICOLON ";"
+        WHITESPACE "\n    "
+        EXPR_STMT
+          FIELD_EXPR
+            FIELD_EXPR
+              PATH_EXPR
+                PATH
+                  PATH_SEGMENT
+                    NAME_REF
+                      IDENT "x"
+              DOT "."
+              NAME_REF
+                INT_NUMBER "0"
+            DOT "."
+            WHITESPACE " "
+            NAME_REF
+              IDENT "bar"
+          SEMICOLON ";"
+        WHITESPACE "\n    "
+        EXPR_STMT
           CALL_EXPR
             FIELD_EXPR
               PATH_EXPR
diff --git a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs
index b8da2ddc309..98dbe45a7ec 100644
--- a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs
+++ b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs
@@ -1,5 +1,7 @@
 fn foo() {
     x.foo;
     x.0.bar;
+    x.0.1;
+    x.0. bar;
     x.0();
 }
diff --git a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast
index dcbcfe1231e..b28b8eb673a 100644
--- a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast
+++ b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast
@@ -58,6 +58,49 @@ SOURCE_FILE
               COMMA ","
               R_PAREN ")"
           SEMICOLON ";"
+        WHITESPACE "\n    "
+        EXPR_STMT
+          METHOD_CALL_EXPR
+            FIELD_EXPR
+              FIELD_EXPR
+                PATH_EXPR
+                  PATH
+                    PATH_SEGMENT
+                      NAME_REF
+                        IDENT "x"
+                DOT "."
+                NAME_REF
+                  INT_NUMBER "0"
+              DOT "."
+              NAME_REF
+                INT_NUMBER "0"
+            DOT "."
+            NAME_REF
+              IDENT "call"
+            ARG_LIST
+              L_PAREN "("
+              R_PAREN ")"
+          SEMICOLON ";"
+        WHITESPACE "\n    "
+        EXPR_STMT
+          METHOD_CALL_EXPR
+            FIELD_EXPR
+              PATH_EXPR
+                PATH
+                  PATH_SEGMENT
+                    NAME_REF
+                      IDENT "x"
+              DOT "."
+              NAME_REF
+                INT_NUMBER "0"
+            DOT "."
+            WHITESPACE " "
+            NAME_REF
+              IDENT "call"
+            ARG_LIST
+              L_PAREN "("
+              R_PAREN ")"
+          SEMICOLON ";"
         WHITESPACE "\n"
         R_CURLY "}"
   WHITESPACE "\n"
diff --git a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs
index 1a3aa35ae8e..48bb6381e80 100644
--- a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs
+++ b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs
@@ -1,4 +1,6 @@
 fn foo() {
     x.foo();
     y.bar::<T>(1, 2,);
+    x.0.0.call();
+    x.0. call();
 }
diff --git a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast
index 9d37ada0da8..af713a22072 100644
--- a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast
+++ b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast
@@ -65,6 +65,41 @@ SOURCE_FILE
               L_PAREN "("
               R_PAREN ")"
           SEMICOLON ";"
+        WHITESPACE "\n    "
+        EXPR_STMT
+          AWAIT_EXPR
+            FIELD_EXPR
+              FIELD_EXPR
+                PATH_EXPR
+                  PATH
+                    PATH_SEGMENT
+                      NAME_REF
+                        IDENT "x"
+                DOT "."
+                NAME_REF
+                  INT_NUMBER "0"
+              DOT "."
+              NAME_REF
+                INT_NUMBER "0"
+            DOT "."
+            AWAIT_KW "await"
+          SEMICOLON ";"
+        WHITESPACE "\n    "
+        EXPR_STMT
+          AWAIT_EXPR
+            FIELD_EXPR
+              PATH_EXPR
+                PATH
+                  PATH_SEGMENT
+                    NAME_REF
+                      IDENT "x"
+              DOT "."
+              NAME_REF
+                INT_NUMBER "0"
+            DOT "."
+            WHITESPACE " "
+            AWAIT_KW "await"
+          SEMICOLON ";"
         WHITESPACE "\n"
         R_CURLY "}"
   WHITESPACE "\n"
diff --git a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs
index d2ba89ca607..fe9a3211bb1 100644
--- a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs
+++ b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs
@@ -2,4 +2,6 @@ fn foo() {
     x.await;
     x.0.await;
     x.0().await?.hello();
+    x.0.0.await;
+    x.0. await;
 }