diff options
| author | Guillaume Gomez <guillaume1.gomez@gmail.com> | 2025-01-31 15:02:41 +0100 |
|---|---|---|
| committer | Guillaume Gomez <guillaume1.gomez@gmail.com> | 2025-02-10 10:38:22 +0100 |
| commit | 49d2d5a1161720ccd5b76ac2afbdceb6ea7e2e6e (patch) | |
| tree | a35c22c0561de96030e09c1210aedf7fcb609540 | |
| parent | c03c38d5c2368cd2aa0e056dba060b94fc747f4e (diff) | |
| download | rust-49d2d5a1161720ccd5b76ac2afbdceb6ea7e2e6e.tar.gz rust-49d2d5a1161720ccd5b76ac2afbdceb6ea7e2e6e.zip | |
Extract `unescape` from `rustc_lexer` into its own crate
| -rw-r--r-- | Cargo.lock | 7 | ||||
| -rw-r--r-- | compiler/rustc_ast/Cargo.toml | 1 | ||||
| -rw-r--r-- | compiler/rustc_ast/src/util/literal.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_parse/Cargo.toml | 1 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/unescape_error_reporting.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_parse_format/Cargo.toml | 1 | ||||
| -rw-r--r-- | compiler/rustc_parse_format/src/lib.rs | 11 | ||||
| -rw-r--r-- | library/literal-escaper/Cargo.toml | 10 | ||||
| -rw-r--r-- | library/literal-escaper/README.md | 4 | ||||
| -rw-r--r-- | library/literal-escaper/src/lib.rs (renamed from compiler/rustc_lexer/src/unescape.rs) | 0 | ||||
| -rw-r--r-- | library/literal-escaper/src/tests.rs (renamed from compiler/rustc_lexer/src/unescape/tests.rs) | 0 |
14 files changed, 36 insertions, 12 deletions
diff --git a/Cargo.lock b/Cargo.lock index fc4c7c9888f..63441814d4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2152,6 +2152,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" [[package]] +name = "literal-escaper" +version = "0.0.1" + +[[package]] name = "lld-wrapper" version = "0.1.0" @@ -3366,6 +3370,7 @@ name = "rustc_ast" version = "0.0.0" dependencies = [ "bitflags", + "literal-escaper", "memchr", "rustc_ast_ir", "rustc_data_structures", @@ -4325,6 +4330,7 @@ name = "rustc_parse" version = "0.0.0" dependencies = [ "bitflags", + "literal-escaper", "rustc_ast", "rustc_ast_pretty", "rustc_data_structures", @@ -4347,6 +4353,7 @@ dependencies = [ name = "rustc_parse_format" version = "0.0.0" dependencies = [ + "literal-escaper", "rustc_index", "rustc_lexer", ] diff --git a/compiler/rustc_ast/Cargo.toml b/compiler/rustc_ast/Cargo.toml index 34c612dac69..176c2d6f8a7 100644 --- a/compiler/rustc_ast/Cargo.toml +++ b/compiler/rustc_ast/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [dependencies] # tidy-alphabetical-start bitflags = "2.4.1" +literal-escaper = { path = "../../library/literal-escaper" } memchr = "2.7.4" rustc_ast_ir = { path = "../rustc_ast_ir" } rustc_data_structures = { path = "../rustc_data_structures" } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 6896ac723fa..121331ece6d 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,7 +2,7 @@ use std::{ascii, fmt, str}; -use rustc_lexer::unescape::{ +use literal_escaper::{ MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, }; use rustc_span::{Span, Symbol, kw, sym}; diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index c63ab77deca..334f451a39c 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -27,7 +27,6 @@ // tidy-alphabetical-end mod cursor; -pub mod unescape; #[cfg(test)] mod tests; diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index 2360914a0ab..109e1f5c093 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [dependencies] # tidy-alphabetical-start bitflags = "2.4.1" +literal-escaper = { path = "../../library/literal-escaper" } rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } rustc_data_structures = { path = "../rustc_data_structures" } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 792e2cc26ef..cefaf13e31f 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,12 +1,12 @@ use std::ops::Range; +use literal_escaper::{self, EscapeError, Mode}; use rustc_ast::ast::{self, AttrStyle}; use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; -use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ @@ -970,7 +970,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_unicode(src, mode, &mut |span, result| { + literal_escaper::unescape_unicode(src, mode, &mut |span, result| { callback(span, result.map(drop)) }) }) @@ -986,7 +986,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_mixed(src, mode, &mut |span, result| { + literal_escaper::unescape_mixed(src, mode, &mut |span, result| { callback(span, result.map(drop)) }) }) diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 2e066f0179c..e8aa400e73d 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -3,8 +3,8 @@ use std::iter::once; use std::ops::Range; +use literal_escaper::{EscapeError, Mode}; use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed}; -use rustc_lexer::unescape::{EscapeError, Mode}; use rustc_span::{BytePos, Span}; use tracing::debug; diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index e0e6c2177da..0b745217bc8 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -6,6 +6,7 @@ use core::ops::{Bound, ControlFlow}; use ast::mut_visit::{self, MutVisitor}; use ast::token::IdentIsRaw; use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered}; +use literal_escaper::unescape_char; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenTree; @@ -21,7 +22,6 @@ use rustc_ast::{ use rustc_ast_pretty::pprust; use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic}; -use rustc_lexer::unescape::unescape_char; use rustc_macros::Subdiagnostic; use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error}; use rustc_session::lint::BuiltinLintDiag; diff --git a/compiler/rustc_parse_format/Cargo.toml b/compiler/rustc_parse_format/Cargo.toml index 707c4e31847..51ea46f4c9b 100644 --- a/compiler/rustc_parse_format/Cargo.toml +++ b/compiler/rustc_parse_format/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] # tidy-alphabetical-start +literal-escaper = { path = "../../library/literal-escaper" } rustc_index = { path = "../rustc_index", default-features = false } rustc_lexer = { path = "../rustc_lexer" } # tidy-alphabetical-end diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 3b985621b57..7e89f9b079b 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -19,7 +19,6 @@ pub use Alignment::*; pub use Count::*; pub use Position::*; -use rustc_lexer::unescape; // Note: copied from rustc_span /// Range inside of a `Span` used for diagnostics when we only have access to relative positions. @@ -1095,12 +1094,14 @@ fn find_width_map_from_snippet( fn unescape_string(string: &str) -> Option<String> { let mut buf = String::new(); let mut ok = true; - unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| { - match unescaped_char { + literal_escaper::unescape_unicode( + string, + literal_escaper::Mode::Str, + &mut |_, unescaped_char| match unescaped_char { Ok(c) => buf.push(c), Err(_) => ok = false, - } - }); + }, + ); ok.then_some(buf) } diff --git a/library/literal-escaper/Cargo.toml b/library/literal-escaper/Cargo.toml new file mode 100644 index 00000000000..708fcd3cacb --- /dev/null +++ b/library/literal-escaper/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "literal-escaper" +version = "0.0.0" +edition = "2021" + +[dependencies] +std = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-std' } + +[features] +rustc-dep-of-std = ["dep:std"] diff --git a/library/literal-escaper/README.md b/library/literal-escaper/README.md new file mode 100644 index 00000000000..5384ac4556a --- /dev/null +++ b/library/literal-escaper/README.md @@ -0,0 +1,4 @@ +# literal-escaper + +This crate provides code to unescape string literals. It is used by `rustc_lexer` +and `proc-macro`. diff --git a/compiler/rustc_lexer/src/unescape.rs b/library/literal-escaper/src/lib.rs index d6ea4249247..d6ea4249247 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/library/literal-escaper/src/lib.rs diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/library/literal-escaper/src/tests.rs index 5b99495f475..5b99495f475 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/library/literal-escaper/src/tests.rs |
