about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLukas Wirth <lukastw97@gmail.com>2024-07-15 14:41:35 +0200
committerLukas Wirth <lukastw97@gmail.com>2024-07-15 14:51:01 +0200
commit311aaa5a792489a1f80cfa0e522e0e60b08788de (patch)
treedb6b2ebb2776a8384a7459684fd9bc58bc0eec5a
parentc5fc66928e0e9ba26253939644f75e87801ad281 (diff)
downloadrust-311aaa5a792489a1f80cfa0e522e0e60b08788de.tar.gz
rust-311aaa5a792489a1f80cfa0e522e0e60b08788de.zip
Fix incorrect encoding of literals in the proc-macro-api on version 4
-rw-r--r--src/tools/rust-analyzer/Cargo.lock3
-rw-r--r--src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs7
-rw-r--r--src/tools/rust-analyzer/crates/hir-expand/src/lib.rs2
-rw-r--r--src/tools/rust-analyzer/crates/mbe/Cargo.toml3
-rw-r--r--src/tools/rust-analyzer/crates/mbe/src/lib.rs9
-rw-r--r--src/tools/rust-analyzer/crates/mbe/src/syntax_bridge.rs53
-rw-r--r--src/tools/rust-analyzer/crates/proc-macro-api/Cargo.toml2
-rw-r--r--src/tools/rust-analyzer/crates/proc-macro-api/src/msg.rs53
-rw-r--r--src/tools/rust-analyzer/crates/proc-macro-api/src/msg/flat.rs118
-rw-r--r--src/tools/rust-analyzer/crates/proc-macro-srv/Cargo.toml2
-rw-r--r--src/tools/rust-analyzer/crates/tt/Cargo.toml4
-rw-r--r--src/tools/rust-analyzer/crates/tt/src/lib.rs61
12 files changed, 183 insertions, 134 deletions
diff --git a/src/tools/rust-analyzer/Cargo.lock b/src/tools/rust-analyzer/Cargo.lock
index e43f712a6e2..500a150b57b 100644
--- a/src/tools/rust-analyzer/Cargo.lock
+++ b/src/tools/rust-analyzer/Cargo.lock
@@ -1046,7 +1046,6 @@ dependencies = [
  "arrayvec",
  "cov-mark",
  "parser",
- "ra-ap-rustc_lexer",
  "rustc-hash",
  "smallvec",
  "span",
@@ -1326,6 +1325,7 @@ dependencies = [
  "base-db",
  "indexmap",
  "la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "mbe",
  "paths",
  "rustc-hash",
  "serde",
@@ -2218,6 +2218,7 @@ name = "tt"
 version = "0.0.0"
 dependencies = [
  "arrayvec",
+ "ra-ap-rustc_lexer",
  "smol_str",
  "stdx",
  "text-size",
diff --git a/src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs b/src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs
index 4fce7c1fde1..49a104fa118 100644
--- a/src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs
+++ b/src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs
@@ -5,9 +5,10 @@ use base_db::CrateId;
 use cfg::CfgExpr;
 use either::Either;
 use intern::{sym, Interned};
+
 use mbe::{
-    desugar_doc_comment_text, syntax_node_to_token_tree, token_to_literal, DelimiterKind,
-    DocCommentDesugarMode, Punct,
+    desugar_doc_comment_text, syntax_node_to_token_tree, DelimiterKind, DocCommentDesugarMode,
+    Punct,
 };
 use smallvec::{smallvec, SmallVec};
 use span::{Span, SyntaxContextId};
@@ -20,7 +21,7 @@ use crate::{
     db::ExpandDatabase,
     mod_path::ModPath,
     span_map::SpanMapRef,
-    tt::{self, Subtree},
+    tt::{self, token_to_literal, Subtree},
     InFile,
 };
 
diff --git a/src/tools/rust-analyzer/crates/hir-expand/src/lib.rs b/src/tools/rust-analyzer/crates/hir-expand/src/lib.rs
index c4921da6100..3460d1ca3d1 100644
--- a/src/tools/rust-analyzer/crates/hir-expand/src/lib.rs
+++ b/src/tools/rust-analyzer/crates/hir-expand/src/lib.rs
@@ -59,7 +59,7 @@ pub use span::{HirFileId, MacroCallId, MacroFileId};
 
 pub mod tt {
     pub use span::Span;
-    pub use tt::{DelimiterKind, IdentIsRaw, LitKind, Spacing};
+    pub use tt::{token_to_literal, DelimiterKind, IdentIsRaw, LitKind, Spacing};
 
     pub type Delimiter = ::tt::Delimiter<Span>;
     pub type DelimSpan = ::tt::DelimSpan<Span>;
diff --git a/src/tools/rust-analyzer/crates/mbe/Cargo.toml b/src/tools/rust-analyzer/crates/mbe/Cargo.toml
index 7ce8aadfb36..1002de2104a 100644
--- a/src/tools/rust-analyzer/crates/mbe/Cargo.toml
+++ b/src/tools/rust-analyzer/crates/mbe/Cargo.toml
@@ -17,7 +17,6 @@ rustc-hash.workspace = true
 smallvec.workspace = true
 tracing.workspace = true
 arrayvec.workspace = true
-ra-ap-rustc_lexer.workspace = true
 
 # local deps
 syntax.workspace = true
@@ -30,7 +29,7 @@ span.workspace = true
 test-utils.workspace = true
 
 [features]
-in-rust-tree = ["parser/in-rust-tree", "syntax/in-rust-tree"]
+in-rust-tree = ["parser/in-rust-tree", "tt/in-rust-tree", "syntax/in-rust-tree"]
 
 [lints]
 workspace = true
diff --git a/src/tools/rust-analyzer/crates/mbe/src/lib.rs b/src/tools/rust-analyzer/crates/mbe/src/lib.rs
index 8ab9269e952..44b056a1acf 100644
--- a/src/tools/rust-analyzer/crates/mbe/src/lib.rs
+++ b/src/tools/rust-analyzer/crates/mbe/src/lib.rs
@@ -6,13 +6,6 @@
 //! The tests for this functionality live in another crate:
 //! `hir_def::macro_expansion_tests::mbe`.
 
-#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]
-
-#[cfg(not(feature = "in-rust-tree"))]
-extern crate ra_ap_rustc_lexer as rustc_lexer;
-#[cfg(feature = "in-rust-tree")]
-extern crate rustc_lexer;
-
 mod expander;
 mod parser;
 mod syntax_bridge;
@@ -36,7 +29,7 @@ pub use tt::{Delimiter, DelimiterKind, Punct};
 pub use crate::syntax_bridge::{
     desugar_doc_comment_text, parse_exprs_with_sep, parse_to_token_tree,
     parse_to_token_tree_static_span, syntax_node_to_token_tree, syntax_node_to_token_tree_modified,
-    token_to_literal, token_tree_to_syntax_node, DocCommentDesugarMode, SpanMapper,
+    token_tree_to_syntax_node, DocCommentDesugarMode, SpanMapper,
 };
 
 pub use crate::syntax_bridge::dummy_test_span_utils::*;
diff --git a/src/tools/rust-analyzer/crates/mbe/src/syntax_bridge.rs b/src/tools/rust-analyzer/crates/mbe/src/syntax_bridge.rs
index 3feddba2106..4d66464932b 100644
--- a/src/tools/rust-analyzer/crates/mbe/src/syntax_bridge.rs
+++ b/src/tools/rust-analyzer/crates/mbe/src/syntax_bridge.rs
@@ -4,7 +4,7 @@ use std::fmt;
 
 use rustc_hash::{FxHashMap, FxHashSet};
 use span::{Edition, SpanAnchor, SpanData, SpanMap};
-use stdx::{format_to, itertools::Itertools, never, non_empty_vec::NonEmptyVec};
+use stdx::{format_to, never, non_empty_vec::NonEmptyVec};
 use syntax::{
     ast::{self, make::tokens::doc_comment},
     format_smolstr, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement,
@@ -14,6 +14,7 @@ use syntax::{
 use tt::{
     buffer::{Cursor, TokenBuffer},
     iter::TtIter,
+    token_to_literal,
 };
 
 use crate::to_parser_input::to_parser_input;
@@ -400,56 +401,6 @@ where
     }
 }
 
-pub fn token_to_literal<S>(text: SmolStr, span: S) -> tt::Literal<S>
-where
-    S: Copy,
-{
-    use rustc_lexer::LiteralKind;
-
-    let token = rustc_lexer::tokenize(&text).next_tuple();
-    let Some((rustc_lexer::Token {
-        kind: rustc_lexer::TokenKind::Literal { kind, suffix_start },
-        ..
-    },)) = token
-    else {
-        return tt::Literal { span, text, kind: tt::LitKind::Err(()), suffix: None };
-    };
-
-    let (kind, start_offset, end_offset) = match kind {
-        LiteralKind::Int { .. } => (tt::LitKind::Integer, 0, 0),
-        LiteralKind::Float { .. } => (tt::LitKind::Float, 0, 0),
-        LiteralKind::Char { terminated } => (tt::LitKind::Char, 1, terminated as usize),
-        LiteralKind::Byte { terminated } => (tt::LitKind::Byte, 2, terminated as usize),
-        LiteralKind::Str { terminated } => (tt::LitKind::Str, 1, terminated as usize),
-        LiteralKind::ByteStr { terminated } => (tt::LitKind::ByteStr, 2, terminated as usize),
-        LiteralKind::CStr { terminated } => (tt::LitKind::CStr, 2, terminated as usize),
-        LiteralKind::RawStr { n_hashes } => (
-            tt::LitKind::StrRaw(n_hashes.unwrap_or_default()),
-            2 + n_hashes.unwrap_or_default() as usize,
-            1 + n_hashes.unwrap_or_default() as usize,
-        ),
-        LiteralKind::RawByteStr { n_hashes } => (
-            tt::LitKind::ByteStrRaw(n_hashes.unwrap_or_default()),
-            3 + n_hashes.unwrap_or_default() as usize,
-            1 + n_hashes.unwrap_or_default() as usize,
-        ),
-        LiteralKind::RawCStr { n_hashes } => (
-            tt::LitKind::CStrRaw(n_hashes.unwrap_or_default()),
-            3 + n_hashes.unwrap_or_default() as usize,
-            1 + n_hashes.unwrap_or_default() as usize,
-        ),
-    };
-
-    let (lit, suffix) = text.split_at(suffix_start as usize);
-    let lit = &lit[start_offset..lit.len() - end_offset];
-    let suffix = match suffix {
-        "" | "_" => None,
-        suffix => Some(Box::new(suffix.into())),
-    };
-
-    tt::Literal { span, text: lit.into(), kind, suffix }
-}
-
 fn is_single_token_op(kind: SyntaxKind) -> bool {
     matches!(
         kind,
diff --git a/src/tools/rust-analyzer/crates/proc-macro-api/Cargo.toml b/src/tools/rust-analyzer/crates/proc-macro-api/Cargo.toml
index 7f633d91ecc..889eefa8b5c 100644
--- a/src/tools/rust-analyzer/crates/proc-macro-api/Cargo.toml
+++ b/src/tools/rust-analyzer/crates/proc-macro-api/Cargo.toml
@@ -28,6 +28,8 @@ span.workspace = true
 # InternIds for the syntax context
 base-db.workspace = true
 la-arena.workspace = true
+# only here to parse via token_to_literal
+mbe.workspace = true
 
 [lints]
 workspace = true
diff --git a/src/tools/rust-analyzer/crates/proc-macro-api/src/msg.rs b/src/tools/rust-analyzer/crates/proc-macro-api/src/msg.rs
index b5f3d0c3aac..65835048173 100644
--- a/src/tools/rust-analyzer/crates/proc-macro-api/src/msg.rs
+++ b/src/tools/rust-analyzer/crates/proc-macro-api/src/msg.rs
@@ -197,7 +197,7 @@ mod tests {
                 .into(),
             ),
             TokenTree::Leaf(Leaf::Literal(Literal {
-                text: "\"Foo\"".into(),
+                text: "Foo".into(),
                 span: Span {
                     range: TextRange::at(TextSize::new(10), TextSize::of("\"Foo\"")),
                     anchor,
@@ -263,32 +263,35 @@ mod tests {
     #[test]
     fn test_proc_macro_rpc_works() {
         let tt = fixture_token_tree();
-        let mut span_data_table = Default::default();
-        let task = ExpandMacro {
-            data: ExpandMacroData {
-                macro_body: FlatTree::new(&tt, CURRENT_API_VERSION, &mut span_data_table),
-                macro_name: Default::default(),
-                attributes: None,
-                has_global_spans: ExpnGlobals {
-                    serialize: true,
-                    def_site: 0,
-                    call_site: 0,
-                    mixed_site: 0,
+        for v in RUST_ANALYZER_SPAN_SUPPORT..=CURRENT_API_VERSION {
+            let mut span_data_table = Default::default();
+            let task = ExpandMacro {
+                data: ExpandMacroData {
+                    macro_body: FlatTree::new(&tt, v, &mut span_data_table),
+                    macro_name: Default::default(),
+                    attributes: None,
+                    has_global_spans: ExpnGlobals {
+                        serialize: true,
+                        def_site: 0,
+                        call_site: 0,
+                        mixed_site: 0,
+                    },
+                    span_data_table: Vec::new(),
                 },
-                span_data_table: Vec::new(),
-            },
-            lib: Utf8PathBuf::from_path_buf(std::env::current_dir().unwrap()).unwrap(),
-            env: Default::default(),
-            current_dir: Default::default(),
-        };
+                lib: Utf8PathBuf::from_path_buf(std::env::current_dir().unwrap()).unwrap(),
+                env: Default::default(),
+                current_dir: Default::default(),
+            };
 
-        let json = serde_json::to_string(&task).unwrap();
-        // println!("{}", json);
-        let back: ExpandMacro = serde_json::from_str(&json).unwrap();
+            let json = serde_json::to_string(&task).unwrap();
+            // println!("{}", json);
+            let back: ExpandMacro = serde_json::from_str(&json).unwrap();
 
-        assert_eq!(
-            tt,
-            back.data.macro_body.to_subtree_resolved(CURRENT_API_VERSION, &span_data_table)
-        );
+            assert_eq!(
+                tt,
+                back.data.macro_body.to_subtree_resolved(v, &span_data_table),
+                "version: {v}"
+            );
+        }
     }
 }
diff --git a/src/tools/rust-analyzer/crates/proc-macro-api/src/msg/flat.rs b/src/tools/rust-analyzer/crates/proc-macro-api/src/msg/flat.rs
index 7f5afdb7270..3d962e99d92 100644
--- a/src/tools/rust-analyzer/crates/proc-macro-api/src/msg/flat.rs
+++ b/src/tools/rust-analyzer/crates/proc-macro-api/src/msg/flat.rs
@@ -141,6 +141,7 @@ impl FlatTree {
             ident: Vec::new(),
             token_tree: Vec::new(),
             text: Vec::new(),
+            version,
         };
         w.write(subtree);
 
@@ -178,6 +179,7 @@ impl FlatTree {
             ident: Vec::new(),
             token_tree: Vec::new(),
             text: Vec::new(),
+            version,
         };
         w.write(subtree);
 
@@ -228,6 +230,7 @@ impl FlatTree {
             token_tree: self.token_tree,
             text: self.text,
             span_data_table,
+            version,
         }
         .read()
     }
@@ -253,6 +256,7 @@ impl FlatTree {
             token_tree: self.token_tree,
             text: self.text,
             span_data_table: &(),
+            version,
         }
         .read()
     }
@@ -386,8 +390,9 @@ impl InternableSpan for Span {
 
 struct Writer<'a, 'span, S: InternableSpan> {
     work: VecDeque<(usize, &'a tt::Subtree<S>)>,
-    string_table: FxHashMap<&'a str, u32>,
+    string_table: FxHashMap<std::borrow::Cow<'a, str>, u32>,
     span_data_table: &'span mut S::Table,
+    version: u32,
 
     subtree: Vec<SubtreeRepr>,
     literal: Vec<LiteralRepr>,
@@ -425,9 +430,15 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> {
                 tt::TokenTree::Leaf(leaf) => match leaf {
                     tt::Leaf::Literal(lit) => {
                         let idx = self.literal.len() as u32;
-                        let text = self.intern(&lit.text);
                         let id = self.token_id_of(lit.span);
-                        let suffix = lit.suffix.as_ref().map(|s| self.intern(s)).unwrap_or(!0);
+                        let (text, suffix) = if self.version >= EXTENDED_LEAF_DATA {
+                            (
+                                self.intern(&lit.text),
+                                lit.suffix.as_ref().map(|s| self.intern(s)).unwrap_or(!0),
+                            )
+                        } else {
+                            (self.intern_owned(format!("{lit}")), !0)
+                        };
                         self.literal.push(LiteralRepr {
                             id,
                             text,
@@ -456,13 +467,15 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> {
                     }
                     tt::Leaf::Ident(ident) => {
                         let idx = self.ident.len() as u32;
-                        let text = self.intern(&ident.text);
                         let id = self.token_id_of(ident.span);
-                        self.ident.push(IdentRepr {
-                            id,
-                            text,
-                            is_raw: ident.is_raw == tt::IdentIsRaw::Yes,
-                        });
+                        let text = if self.version >= EXTENDED_LEAF_DATA {
+                            self.intern(&ident.text)
+                        } else if ident.is_raw.yes() {
+                            self.intern_owned(format!("r#{}", ident.text,))
+                        } else {
+                            self.intern(&ident.text)
+                        };
+                        self.ident.push(IdentRepr { id, text, is_raw: ident.is_raw.yes() });
                         idx << 2 | 0b11
                     }
                 },
@@ -484,15 +497,25 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> {
 
     pub(crate) fn intern(&mut self, text: &'a str) -> u32 {
         let table = &mut self.text;
-        *self.string_table.entry(text).or_insert_with(|| {
+        *self.string_table.entry(text.into()).or_insert_with(|| {
             let idx = table.len();
             table.push(text.to_owned());
             idx as u32
         })
     }
+
+    pub(crate) fn intern_owned(&mut self, text: String) -> u32 {
+        let table = &mut self.text;
+        *self.string_table.entry(text.clone().into()).or_insert_with(|| {
+            let idx = table.len();
+            table.push(text);
+            idx as u32
+        })
+    }
 }
 
 struct Reader<'span, S: InternableSpan> {
+    version: u32,
     subtree: Vec<SubtreeRepr>,
     literal: Vec<LiteralRepr>,
     punct: Vec<PunctRepr>,
@@ -528,30 +551,36 @@ impl<'span, S: InternableSpan> Reader<'span, S> {
                             0b01 => {
                                 use tt::LitKind::*;
                                 let repr = &self.literal[idx];
-                                tt::Leaf::Literal(tt::Literal {
-                                    text: self.text[repr.text as usize].as_str().into(),
-                                    span: read_span(repr.id),
-                                    kind: match u16::to_le_bytes(repr.kind) {
-                                        [0, _] => Err(()),
-                                        [1, _] => Byte,
-                                        [2, _] => Char,
-                                        [3, _] => Integer,
-                                        [4, _] => Float,
-                                        [5, _] => Str,
-                                        [6, r] => StrRaw(r),
-                                        [7, _] => ByteStr,
-                                        [8, r] => ByteStrRaw(r),
-                                        [9, _] => CStr,
-                                        [10, r] => CStrRaw(r),
-                                        _ => unreachable!(),
-                                    },
-                                    suffix: if repr.suffix != !0 {
-                                        Some(Box::new(
-                                            self.text[repr.suffix as usize].as_str().into(),
-                                        ))
-                                    } else {
-                                        None
-                                    },
+                                let text = self.text[repr.text as usize].as_str();
+                                let span = read_span(repr.id);
+                                tt::Leaf::Literal(if self.version >= EXTENDED_LEAF_DATA {
+                                    tt::Literal {
+                                        text: text.into(),
+                                        span,
+                                        kind: match u16::to_le_bytes(repr.kind) {
+                                            [0, _] => Err(()),
+                                            [1, _] => Byte,
+                                            [2, _] => Char,
+                                            [3, _] => Integer,
+                                            [4, _] => Float,
+                                            [5, _] => Str,
+                                            [6, r] => StrRaw(r),
+                                            [7, _] => ByteStr,
+                                            [8, r] => ByteStrRaw(r),
+                                            [9, _] => CStr,
+                                            [10, r] => CStrRaw(r),
+                                            _ => unreachable!(),
+                                        },
+                                        suffix: if repr.suffix != !0 {
+                                            Some(Box::new(
+                                                self.text[repr.suffix as usize].as_str().into(),
+                                            ))
+                                        } else {
+                                            None
+                                        },
+                                    }
+                                } else {
+                                    tt::token_to_literal(text.into(), span)
                                 })
                                 .into()
                             }
@@ -566,14 +595,23 @@ impl<'span, S: InternableSpan> Reader<'span, S> {
                             }
                             0b11 => {
                                 let repr = &self.ident[idx];
+                                let text = self.text[repr.text as usize].as_str();
+                                let (is_raw, text) = if self.version >= EXTENDED_LEAF_DATA {
+                                    (
+                                        if repr.is_raw {
+                                            tt::IdentIsRaw::Yes
+                                        } else {
+                                            tt::IdentIsRaw::No
+                                        },
+                                        text,
+                                    )
+                                } else {
+                                    tt::IdentIsRaw::split_from_symbol(text)
+                                };
                                 tt::Leaf::Ident(tt::Ident {
-                                    text: self.text[repr.text as usize].as_str().into(),
+                                    text: text.into(),
                                     span: read_span(repr.id),
-                                    is_raw: if repr.is_raw {
-                                        tt::IdentIsRaw::Yes
-                                    } else {
-                                        tt::IdentIsRaw::No
-                                    },
+                                    is_raw,
                                 })
                                 .into()
                             }
diff --git a/src/tools/rust-analyzer/crates/proc-macro-srv/Cargo.toml b/src/tools/rust-analyzer/crates/proc-macro-srv/Cargo.toml
index 735f781c439..065701c05cc 100644
--- a/src/tools/rust-analyzer/crates/proc-macro-srv/Cargo.toml
+++ b/src/tools/rust-analyzer/crates/proc-macro-srv/Cargo.toml
@@ -34,7 +34,7 @@ proc-macro-test.path = "./proc-macro-test"
 
 [features]
 sysroot-abi = []
-in-rust-tree = ["mbe/in-rust-tree", "sysroot-abi"]
+in-rust-tree = ["mbe/in-rust-tree", "tt/in-rust-tree","sysroot-abi"]
 
 [lints]
 workspace = true
diff --git a/src/tools/rust-analyzer/crates/tt/Cargo.toml b/src/tools/rust-analyzer/crates/tt/Cargo.toml
index 1311e2ddf89..1900635b995 100644
--- a/src/tools/rust-analyzer/crates/tt/Cargo.toml
+++ b/src/tools/rust-analyzer/crates/tt/Cargo.toml
@@ -17,6 +17,10 @@ smol_str.workspace = true
 text-size.workspace = true
 
 stdx.workspace = true
+ra-ap-rustc_lexer.workspace = true
+
+[features]
+in-rust-tree = []
 
 [lints]
 workspace = true
diff --git a/src/tools/rust-analyzer/crates/tt/src/lib.rs b/src/tools/rust-analyzer/crates/tt/src/lib.rs
index 24fd0abada5..c328b3f8a3c 100644
--- a/src/tools/rust-analyzer/crates/tt/src/lib.rs
+++ b/src/tools/rust-analyzer/crates/tt/src/lib.rs
@@ -2,14 +2,21 @@
 //! input and output) of macros. It closely mirrors `proc_macro` crate's
 //! `TokenTree`.
 
+#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]
+
+#[cfg(not(feature = "in-rust-tree"))]
+extern crate ra_ap_rustc_lexer as rustc_lexer;
+#[cfg(feature = "in-rust-tree")]
+extern crate rustc_lexer;
+
 pub mod buffer;
 pub mod iter;
 
 use std::fmt;
 
-use stdx::impl_from;
+use stdx::{impl_from, itertools::Itertools as _};
 
-pub use smol_str::SmolStr;
+pub use smol_str::{format_smolstr, SmolStr};
 pub use text_size::{TextRange, TextSize};
 
 #[derive(Clone, PartialEq, Debug)]
@@ -196,6 +203,56 @@ pub struct Literal<S> {
     pub suffix: Option<Box<SmolStr>>,
 }
 
+pub fn token_to_literal<S>(text: SmolStr, span: S) -> Literal<S>
+where
+    S: Copy,
+{
+    use rustc_lexer::LiteralKind;
+
+    let token = rustc_lexer::tokenize(&text).next_tuple();
+    let Some((rustc_lexer::Token {
+        kind: rustc_lexer::TokenKind::Literal { kind, suffix_start },
+        ..
+    },)) = token
+    else {
+        return Literal { span, text, kind: LitKind::Err(()), suffix: None };
+    };
+
+    let (kind, start_offset, end_offset) = match kind {
+        LiteralKind::Int { .. } => (LitKind::Integer, 0, 0),
+        LiteralKind::Float { .. } => (LitKind::Float, 0, 0),
+        LiteralKind::Char { terminated } => (LitKind::Char, 1, terminated as usize),
+        LiteralKind::Byte { terminated } => (LitKind::Byte, 2, terminated as usize),
+        LiteralKind::Str { terminated } => (LitKind::Str, 1, terminated as usize),
+        LiteralKind::ByteStr { terminated } => (LitKind::ByteStr, 2, terminated as usize),
+        LiteralKind::CStr { terminated } => (LitKind::CStr, 2, terminated as usize),
+        LiteralKind::RawStr { n_hashes } => (
+            LitKind::StrRaw(n_hashes.unwrap_or_default()),
+            2 + n_hashes.unwrap_or_default() as usize,
+            1 + n_hashes.unwrap_or_default() as usize,
+        ),
+        LiteralKind::RawByteStr { n_hashes } => (
+            LitKind::ByteStrRaw(n_hashes.unwrap_or_default()),
+            3 + n_hashes.unwrap_or_default() as usize,
+            1 + n_hashes.unwrap_or_default() as usize,
+        ),
+        LiteralKind::RawCStr { n_hashes } => (
+            LitKind::CStrRaw(n_hashes.unwrap_or_default()),
+            3 + n_hashes.unwrap_or_default() as usize,
+            1 + n_hashes.unwrap_or_default() as usize,
+        ),
+    };
+
+    let (lit, suffix) = text.split_at(suffix_start as usize);
+    let lit = &lit[start_offset..lit.len() - end_offset];
+    let suffix = match suffix {
+        "" | "_" => None,
+        suffix => Some(Box::new(suffix.into())),
+    };
+
+    Literal { span, text: lit.into(), kind, suffix }
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub struct Punct<S> {
     pub char: char,