diff options
| author | bors <bors@rust-lang.org> | 2025-03-17 10:43:38 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-03-17 10:43:38 +0000 |
| commit | 9c67cecd12d79f1bbc00a74f70e7ef9fff086a5a (patch) | |
| tree | d43ca065385a4b4d3d8f1b70ba5737dbefa2a46d /compiler | |
| parent | 9bad8ac498985707f29b0bdc0293cc0457a3ab38 (diff) | |
| parent | 87b87b1966fef82e15a9f9619ee46f5e843cf4c2 (diff) | |
| download | rust-9c67cecd12d79f1bbc00a74f70e7ef9fff086a5a.tar.gz rust-9c67cecd12d79f1bbc00a74f70e7ef9fff086a5a.zip | |
Auto merge of #138595 - jhpratt:rollup-09pvfzu, r=jhpratt
Rollup of 9 pull requests Successful merges: - #136355 (Add `*_value` methods to proc_macro lib) - #137621 (Add std support to cygwin target) - #137793 (Stablize anonymous pipe) - #138341 (std: Mention clone-on-write mutation in Arc<T>) - #138517 (Improve upvar analysis for deref of child capture) - #138584 (Update Rust Foundation links in Readme) - #138586 (Document `#![register_tool]`) - #138590 (Flatten and simplify some control flow 🫓) - #138592 (update change entry for #137147) r? `@ghost` `@rustbot` modify labels: rollup
Diffstat (limited to 'compiler')
20 files changed, 92 insertions, 812 deletions
diff --git a/compiler/rustc_ast/Cargo.toml b/compiler/rustc_ast/Cargo.toml index 902287d0328..7f0db1560c1 100644 --- a/compiler/rustc_ast/Cargo.toml +++ b/compiler/rustc_ast/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start bitflags = "2.4.1" +literal-escaper = { path = "../../library/literal-escaper" } memchr = "2.7.4" rustc_ast_ir = { path = "../rustc_ast_ir" } rustc_data_structures = { path = "../rustc_data_structures" } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 6896ac723fa..121331ece6d 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,7 +2,7 @@ use std::{ascii, fmt, str}; -use rustc_lexer::unescape::{ +use literal_escaper::{ MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, }; use rustc_span::{Span, Symbol, kw, sym}; diff --git a/compiler/rustc_borrowck/src/lib.rs b/compiler/rustc_borrowck/src/lib.rs index 07b3f3477a8..ed95545dea4 100644 --- a/compiler/rustc_borrowck/src/lib.rs +++ b/compiler/rustc_borrowck/src/lib.rs @@ -2479,19 +2479,15 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { let body = self.body; for local in body.mut_vars_and_args_iter().filter(|local| !self.used_mut.contains(local)) { let local_decl = &body.local_decls[local]; - let lint_root = match &body.source_scopes[local_decl.source_info.scope].local_data { - ClearCrossCrate::Set(data) => data.lint_root, - _ => continue, + let ClearCrossCrate::Set(SourceScopeLocalData { lint_root, .. }) = + body.source_scopes[local_decl.source_info.scope].local_data + else { + continue; }; // Skip over locals that begin with an underscore or have no name - match self.local_names[local] { - Some(name) => { - if name.as_str().starts_with('_') { - continue; - } - } - None => continue, + if self.local_names[local].is_none_or(|name| name.as_str().starts_with('_')) { + continue; } let span = local_decl.source_info.span; diff --git a/compiler/rustc_hir_analysis/src/check/mod.rs b/compiler/rustc_hir_analysis/src/check/mod.rs index b4a16b2b805..d8ae4214527 100644 --- a/compiler/rustc_hir_analysis/src/check/mod.rs +++ b/compiler/rustc_hir_analysis/src/check/mod.rs @@ -153,10 +153,9 @@ pub(super) fn maybe_check_static_with_link_section(tcx: TyCtxt<'_>, id: LocalDef } // If `#[link_section]` is missing, then nothing to verify - let attrs = tcx.codegen_fn_attrs(id); - if attrs.link_section.is_none() { + let Some(link_section) = tcx.codegen_fn_attrs(id).link_section else { return; - } + }; // For the wasm32 target statics with `#[link_section]` other than `.init_array` // are placed into custom sections of the final output file, but this isn't like @@ -182,11 +181,8 @@ pub(super) fn maybe_check_static_with_link_section(tcx: TyCtxt<'_>, id: LocalDef // continue to work, but would no longer be necessary. if let Ok(alloc) = tcx.eval_static_initializer(id.to_def_id()) - && alloc.inner().provenance().ptrs().len() != 0 - && attrs - .link_section - .map(|link_section| !link_section.as_str().starts_with(".init_array")) - .unwrap() + && !alloc.inner().provenance().ptrs().is_empty() + && !link_section.as_str().starts_with(".init_array") { let msg = "statics with a custom `#[link_section]` must be a \ simple list of bytes on the wasm target with no \ diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs index f19e36206a7..afbb1adf654 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs @@ -1532,30 +1532,29 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { if self.may_coerce(blk_ty, *elem_ty) && blk.stmts.is_empty() && blk.rules == hir::BlockCheckMode::DefaultBlock + && let source_map = self.tcx.sess.source_map() + && let Ok(snippet) = source_map.span_to_snippet(blk.span) + && snippet.starts_with('{') + && snippet.ends_with('}') { - let source_map = self.tcx.sess.source_map(); - if let Ok(snippet) = source_map.span_to_snippet(blk.span) { - if snippet.starts_with('{') && snippet.ends_with('}') { - diag.multipart_suggestion_verbose( - "to create an array, use square brackets instead of curly braces", - vec![ - ( - blk.span - .shrink_to_lo() - .with_hi(rustc_span::BytePos(blk.span.lo().0 + 1)), - "[".to_string(), - ), - ( - blk.span - .shrink_to_hi() - .with_lo(rustc_span::BytePos(blk.span.hi().0 - 1)), - "]".to_string(), - ), - ], - Applicability::MachineApplicable, - ); - } - } + diag.multipart_suggestion_verbose( + "to create an array, use square brackets instead of curly braces", + vec![ + ( + blk.span + .shrink_to_lo() + .with_hi(rustc_span::BytePos(blk.span.lo().0 + 1)), + "[".to_string(), + ), + ( + blk.span + .shrink_to_hi() + .with_lo(rustc_span::BytePos(blk.span.hi().0 - 1)), + "]".to_string(), + ), + ], + Applicability::MachineApplicable, + ); } } } diff --git a/compiler/rustc_hir_typeck/src/upvar.rs b/compiler/rustc_hir_typeck/src/upvar.rs index 37f3786c00a..fc98a603dd8 100644 --- a/compiler/rustc_hir_typeck/src/upvar.rs +++ b/compiler/rustc_hir_typeck/src/upvar.rs @@ -1862,8 +1862,9 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { /// /// (1.) Are we borrowing data owned by the parent closure? We can determine if /// that is the case by checking if the parent capture is by move, EXCEPT if we -/// apply a deref projection, which means we're reborrowing a reference that we -/// captured by move. +/// apply a deref projection of an immutable reference, reborrows of immutable +/// references which aren't restricted to the LUB of the lifetimes of the deref +/// chain. This is why `&'short mut &'long T` can be reborrowed as `&'long T`. /// /// ```rust /// let x = &1i32; // Let's call this lifetime `'1`. @@ -1902,10 +1903,22 @@ fn should_reborrow_from_env_of_parent_coroutine_closure<'tcx>( ) -> bool { // (1.) (!parent_capture.is_by_ref() - && !matches!( - child_capture.place.projections.get(parent_capture.place.projections.len()), - Some(Projection { kind: ProjectionKind::Deref, .. }) - )) + // This is just inlined `place.deref_tys()` but truncated to just + // the child projections. Namely, look for a `&T` deref, since we + // can always extend `&'short mut &'long T` to `&'long T`. + && !child_capture + .place + .projections + .iter() + .enumerate() + .skip(parent_capture.place.projections.len()) + .any(|(idx, proj)| { + matches!(proj.kind, ProjectionKind::Deref) + && matches!( + child_capture.place.ty_before_projection(idx).kind(), + ty::Ref(.., ty::Mutability::Not) + ) + })) // (2.) || matches!(child_capture.info.capture_kind, UpvarCapture::ByRef(ty::BorrowKind::Mutable)) } diff --git a/compiler/rustc_lexer/Cargo.toml b/compiler/rustc_lexer/Cargo.toml index 448a50faf45..c72425fd92d 100644 --- a/compiler/rustc_lexer/Cargo.toml +++ b/compiler/rustc_lexer/Cargo.toml @@ -16,6 +16,7 @@ Rust lexer used by rustc. No stability guarantees are provided. [dependencies] memchr = "2.7.4" unicode-xid = "0.2.0" +literal-escaper = { path = "../../library/literal-escaper" } [dependencies.unicode-properties] version = "0.1.0" diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 61638e45253..c45dd33982b 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -26,11 +26,13 @@ // tidy-alphabetical-end mod cursor; -pub mod unescape; #[cfg(test)] mod tests; +// FIXME: This is needed for rust-analyzer. Remove this dependency once rust-analyzer uses +// `literal-escaper`. +pub use literal_escaper as unescape; use unicode_properties::UnicodeEmoji; pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION; diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs deleted file mode 100644 index d6ea4249247..00000000000 --- a/compiler/rustc_lexer/src/unescape.rs +++ /dev/null @@ -1,438 +0,0 @@ -//! Utilities for validating string and char literals and turning them into -//! values they represent. - -use std::ops::Range; -use std::str::Chars; - -use Mode::*; - -#[cfg(test)] -mod tests; - -/// Errors and warnings that can occur during string unescaping. They mostly -/// relate to malformed escape sequences, but there are a few that are about -/// other problems. -#[derive(Debug, PartialEq, Eq)] -pub enum EscapeError { - /// Expected 1 char, but 0 were found. - ZeroChars, - /// Expected 1 char, but more than 1 were found. - MoreThanOneChar, - - /// Escaped '\' character without continuation. - LoneSlash, - /// Invalid escape character (e.g. '\z'). - InvalidEscape, - /// Raw '\r' encountered. - BareCarriageReturn, - /// Raw '\r' encountered in raw string. - BareCarriageReturnInRawString, - /// Unescaped character that was expected to be escaped (e.g. raw '\t'). - EscapeOnlyChar, - - /// Numeric character escape is too short (e.g. '\x1'). - TooShortHexEscape, - /// Invalid character in numeric escape (e.g. '\xz') - InvalidCharInHexEscape, - /// Character code in numeric escape is non-ascii (e.g. '\xFF'). - OutOfRangeHexEscape, - - /// '\u' not followed by '{'. - NoBraceInUnicodeEscape, - /// Non-hexadecimal value in '\u{..}'. - InvalidCharInUnicodeEscape, - /// '\u{}' - EmptyUnicodeEscape, - /// No closing brace in '\u{..}', e.g. '\u{12'. - UnclosedUnicodeEscape, - /// '\u{_12}' - LeadingUnderscoreUnicodeEscape, - /// More than 6 characters in '\u{..}', e.g. '\u{10FFFF_FF}' - OverlongUnicodeEscape, - /// Invalid in-bound unicode character code, e.g. '\u{DFFF}'. - LoneSurrogateUnicodeEscape, - /// Out of bounds unicode character code, e.g. '\u{FFFFFF}'. - OutOfRangeUnicodeEscape, - - /// Unicode escape code in byte literal. - UnicodeEscapeInByte, - /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. - NonAsciiCharInByte, - - // `\0` in a C string literal. - NulInCStr, - - /// After a line ending with '\', the next line contains whitespace - /// characters that are not skipped. - UnskippedWhitespaceWarning, - - /// After a line ending with '\', multiple lines are skipped. - MultipleSkippedLinesWarning, -} - -impl EscapeError { - /// Returns true for actual errors, as opposed to warnings. - pub fn is_fatal(&self) -> bool { - !matches!( - self, - EscapeError::UnskippedWhitespaceWarning | EscapeError::MultipleSkippedLinesWarning - ) - } -} - -/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without -/// quotes) and produces a sequence of escaped characters or errors. -/// -/// Values are returned by invoking `callback`. For `Char` and `Byte` modes, -/// the callback will be called exactly once. -pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range<usize>, Result<char, EscapeError>), -{ - match mode { - Char | Byte => { - let mut chars = src.chars(); - let res = unescape_char_or_byte(&mut chars, mode); - callback(0..(src.len() - chars.as_str().len()), res); - } - Str | ByteStr => unescape_non_raw_common(src, mode, callback), - RawStr | RawByteStr => check_raw_common(src, mode, callback), - RawCStr => check_raw_common(src, mode, &mut |r, mut result| { - if let Ok('\0') = result { - result = Err(EscapeError::NulInCStr); - } - callback(r, result) - }), - CStr => unreachable!(), - } -} - -/// Used for mixed utf8 string literals, i.e. those that allow both unicode -/// chars and high bytes. -pub enum MixedUnit { - /// Used for ASCII chars (written directly or via `\x00`..`\x7f` escapes) - /// and Unicode chars (written directly or via `\u` escapes). - /// - /// For example, if '¥' appears in a string it is represented here as - /// `MixedUnit::Char('¥')`, and it will be appended to the relevant byte - /// string as the two-byte UTF-8 sequence `[0xc2, 0xa5]` - Char(char), - - /// Used for high bytes (`\x80`..`\xff`). - /// - /// For example, if `\xa5` appears in a string it is represented here as - /// `MixedUnit::HighByte(0xa5)`, and it will be appended to the relevant - /// byte string as the single byte `0xa5`. - HighByte(u8), -} - -impl From<char> for MixedUnit { - fn from(c: char) -> Self { - MixedUnit::Char(c) - } -} - -impl From<u8> for MixedUnit { - fn from(n: u8) -> Self { - if n.is_ascii() { MixedUnit::Char(n as char) } else { MixedUnit::HighByte(n) } - } -} - -/// Takes the contents of a mixed-utf8 literal (without quotes) and produces -/// a sequence of escaped characters or errors. -/// -/// Values are returned by invoking `callback`. -pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range<usize>, Result<MixedUnit, EscapeError>), -{ - match mode { - CStr => unescape_non_raw_common(src, mode, &mut |r, mut result| { - if let Ok(MixedUnit::Char('\0')) = result { - result = Err(EscapeError::NulInCStr); - } - callback(r, result) - }), - Char | Byte | Str | RawStr | ByteStr | RawByteStr | RawCStr => unreachable!(), - } -} - -/// Takes a contents of a char literal (without quotes), and returns an -/// unescaped char or an error. -pub fn unescape_char(src: &str) -> Result<char, EscapeError> { - unescape_char_or_byte(&mut src.chars(), Char) -} - -/// Takes a contents of a byte literal (without quotes), and returns an -/// unescaped byte or an error. -pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> { - unescape_char_or_byte(&mut src.chars(), Byte).map(byte_from_char) -} - -/// What kind of literal do we parse. -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Mode { - Char, - - Byte, - - Str, - RawStr, - - ByteStr, - RawByteStr, - - CStr, - RawCStr, -} - -impl Mode { - pub fn in_double_quotes(self) -> bool { - match self { - Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true, - Char | Byte => false, - } - } - - /// Are `\x80`..`\xff` allowed? - fn allow_high_bytes(self) -> bool { - match self { - Char | Str => false, - Byte | ByteStr | CStr => true, - RawStr | RawByteStr | RawCStr => unreachable!(), - } - } - - /// Are unicode (non-ASCII) chars allowed? - #[inline] - fn allow_unicode_chars(self) -> bool { - match self { - Byte | ByteStr | RawByteStr => false, - Char | Str | RawStr | CStr | RawCStr => true, - } - } - - /// Are unicode escapes (`\u`) allowed? - fn allow_unicode_escapes(self) -> bool { - match self { - Byte | ByteStr => false, - Char | Str | CStr => true, - RawByteStr | RawStr | RawCStr => unreachable!(), - } - } - - pub fn prefix_noraw(self) -> &'static str { - match self { - Char | Str | RawStr => "", - Byte | ByteStr | RawByteStr => "b", - CStr | RawCStr => "c", - } - } -} - -fn scan_escape<T: From<char> + From<u8>>( - chars: &mut Chars<'_>, - mode: Mode, -) -> Result<T, EscapeError> { - // Previous character was '\\', unescape what follows. - let res: char = match chars.next().ok_or(EscapeError::LoneSlash)? { - '"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '\'' => '\'', - '0' => '\0', - 'x' => { - // Parse hexadecimal character code. - - let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let value = (hi * 16 + lo) as u8; - - return if !mode.allow_high_bytes() && !value.is_ascii() { - Err(EscapeError::OutOfRangeHexEscape) - } else { - // This may be a high byte, but that will only happen if `T` is - // `MixedUnit`, because of the `allow_high_bytes` check above. - Ok(T::from(value)) - }; - } - 'u' => return scan_unicode(chars, mode.allow_unicode_escapes()).map(T::from), - _ => return Err(EscapeError::InvalidEscape), - }; - Ok(T::from(res)) -} - -fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result<char, EscapeError> { - // We've parsed '\u', now we have to parse '{..}'. - - if chars.next() != Some('{') { - return Err(EscapeError::NoBraceInUnicodeEscape); - } - - // First character must be a hexadecimal digit. - let mut n_digits = 1; - let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { - '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), - '}' => return Err(EscapeError::EmptyUnicodeEscape), - c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, - }; - - // First character is valid, now parse the rest of the number - // and closing brace. - loop { - match chars.next() { - None => return Err(EscapeError::UnclosedUnicodeEscape), - Some('_') => continue, - Some('}') => { - if n_digits > 6 { - return Err(EscapeError::OverlongUnicodeEscape); - } - - // Incorrect syntax has higher priority for error reporting - // than unallowed value for a literal. - if !allow_unicode_escapes { - return Err(EscapeError::UnicodeEscapeInByte); - } - - break std::char::from_u32(value).ok_or({ - if value > 0x10FFFF { - EscapeError::OutOfRangeUnicodeEscape - } else { - EscapeError::LoneSurrogateUnicodeEscape - } - }); - } - Some(c) => { - let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; - n_digits += 1; - if n_digits > 6 { - // Stop updating value since we're sure that it's incorrect already. - continue; - } - value = value * 16 + digit; - } - }; - } -} - -#[inline] -fn ascii_check(c: char, allow_unicode_chars: bool) -> Result<char, EscapeError> { - if allow_unicode_chars || c.is_ascii() { Ok(c) } else { Err(EscapeError::NonAsciiCharInByte) } -} - -fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> { - let c = chars.next().ok_or(EscapeError::ZeroChars)?; - let res = match c { - '\\' => scan_escape(chars, mode), - '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode.allow_unicode_chars()), - }?; - if chars.next().is_some() { - return Err(EscapeError::MoreThanOneChar); - } - Ok(res) -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of escaped characters or errors. -fn unescape_non_raw_common<F, T: From<char> + From<u8>>(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range<usize>, Result<T, EscapeError>), -{ - let mut chars = src.chars(); - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop - - // The `start` and `end` computation here is complicated because - // `skip_ascii_whitespace` makes us to skip over chars without counting - // them in the range computation. - while let Some(c) = chars.next() { - let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { - '\\' => { - match chars.clone().next() { - Some('\n') => { - // Rust language specification requires us to skip whitespaces - // if unescaped '\' character is followed by '\n'. - // For details see [Rust language reference] - // (https://doc.rust-lang.org/reference/tokens.html#string-literals). - skip_ascii_whitespace(&mut chars, start, &mut |range, err| { - callback(range, Err(err)) - }); - continue; - } - _ => scan_escape::<T>(&mut chars, mode), - } - } - '"' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, allow_unicode_chars).map(T::from), - }; - let end = src.len() - chars.as_str().len(); - callback(start..end, res); - } -} - -fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F) -where - F: FnMut(Range<usize>, EscapeError), -{ - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; - callback(start..end, EscapeError::MultipleSkippedLinesWarning); - } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().next() { - if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, EscapeError::UnskippedWhitespaceWarning); - } - } - *chars = tail.chars(); -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of characters or errors. -/// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only produce errors on bare CR. -fn check_raw_common<F>(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range<usize>, Result<char, EscapeError>), -{ - let mut chars = src.chars(); - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop - - // The `start` and `end` computation here matches the one in - // `unescape_non_raw_common` for consistency, even though this function - // doesn't have to worry about skipping any chars. - while let Some(c) = chars.next() { - let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { - '\r' => Err(EscapeError::BareCarriageReturnInRawString), - _ => ascii_check(c, allow_unicode_chars), - }; - let end = src.len() - chars.as_str().len(); - callback(start..end, res); - } -} - -#[inline] -pub fn byte_from_char(c: char) -> u8 { - let res = c as u32; - debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr"); - res as u8 -} diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs deleted file mode 100644 index 5b99495f475..00000000000 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ /dev/null @@ -1,286 +0,0 @@ -use super::*; - -#[test] -fn test_unescape_char_bad() { - fn check(literal_text: &str, expected_error: EscapeError) { - assert_eq!(unescape_char(literal_text), Err(expected_error)); - } - - check("", EscapeError::ZeroChars); - check(r"\", EscapeError::LoneSlash); - - check("\n", EscapeError::EscapeOnlyChar); - check("\t", EscapeError::EscapeOnlyChar); - check("'", EscapeError::EscapeOnlyChar); - check("\r", EscapeError::BareCarriageReturn); - - check("spam", EscapeError::MoreThanOneChar); - check(r"\x0ff", EscapeError::MoreThanOneChar); - check(r#"\"a"#, EscapeError::MoreThanOneChar); - check(r"\na", EscapeError::MoreThanOneChar); - check(r"\ra", EscapeError::MoreThanOneChar); - check(r"\ta", EscapeError::MoreThanOneChar); - check(r"\\a", EscapeError::MoreThanOneChar); - check(r"\'a", EscapeError::MoreThanOneChar); - check(r"\0a", EscapeError::MoreThanOneChar); - check(r"\u{0}x", EscapeError::MoreThanOneChar); - check(r"\u{1F63b}}", EscapeError::MoreThanOneChar); - - check(r"\v", EscapeError::InvalidEscape); - check(r"\💩", EscapeError::InvalidEscape); - check(r"\●", EscapeError::InvalidEscape); - check("\\\r", EscapeError::InvalidEscape); - - check(r"\x", EscapeError::TooShortHexEscape); - check(r"\x0", EscapeError::TooShortHexEscape); - check(r"\xf", EscapeError::TooShortHexEscape); - check(r"\xa", EscapeError::TooShortHexEscape); - check(r"\xx", EscapeError::InvalidCharInHexEscape); - check(r"\xы", EscapeError::InvalidCharInHexEscape); - check(r"\x🦀", EscapeError::InvalidCharInHexEscape); - check(r"\xtt", EscapeError::InvalidCharInHexEscape); - check(r"\xff", EscapeError::OutOfRangeHexEscape); - check(r"\xFF", EscapeError::OutOfRangeHexEscape); - check(r"\x80", EscapeError::OutOfRangeHexEscape); - - check(r"\u", EscapeError::NoBraceInUnicodeEscape); - check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); - check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); - check(r"\u{", EscapeError::UnclosedUnicodeEscape); - check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); - check(r"\u{}", EscapeError::EmptyUnicodeEscape); - check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); - check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); - check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape); - check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); - check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); - - check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape); - - check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape); -} - -#[test] -fn test_unescape_char_good() { - fn check(literal_text: &str, expected_char: char) { - assert_eq!(unescape_char(literal_text), Ok(expected_char)); - } - - check("a", 'a'); - check("ы", 'ы'); - check("🦀", '🦀'); - - check(r#"\""#, '"'); - check(r"\n", '\n'); - check(r"\r", '\r'); - check(r"\t", '\t'); - check(r"\\", '\\'); - check(r"\'", '\''); - check(r"\0", '\0'); - - check(r"\x00", '\0'); - check(r"\x5a", 'Z'); - check(r"\x5A", 'Z'); - check(r"\x7f", 127 as char); - - check(r"\u{0}", '\0'); - check(r"\u{000000}", '\0'); - check(r"\u{41}", 'A'); - check(r"\u{0041}", 'A'); - check(r"\u{00_41}", 'A'); - check(r"\u{4__1__}", 'A'); - check(r"\u{1F63b}", '😻'); -} - -#[test] -fn test_unescape_str_warn() { - fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - // Check we can handle escaped newlines at the end of a file. - check("\\\n", &[]); - check("\\\n ", &[]); - - check( - "\\\n \u{a0} x", - &[ - (0..5, Err(EscapeError::UnskippedWhitespaceWarning)), - (3..5, Ok('\u{a0}')), - (5..6, Ok(' ')), - (6..7, Ok('x')), - ], - ); - check("\\\n \n x", &[(0..7, Err(EscapeError::MultipleSkippedLinesWarning)), (7..8, Ok('x'))]); -} - -#[test] -fn test_unescape_str_good() { - fn check(literal_text: &str, expected: &str) { - let mut buf = Ok(String::with_capacity(literal_text.len())); - unescape_unicode(literal_text, Mode::Str, &mut |range, c| { - if let Ok(b) = &mut buf { - match c { - Ok(c) => b.push(c), - Err(e) => buf = Err((range, e)), - } - } - }); - assert_eq!(buf.as_deref(), Ok(expected)) - } - - check("foo", "foo"); - check("", ""); - check(" \t\n", " \t\n"); - - check("hello \\\n world", "hello world"); - check("thread's", "thread's") -} - -#[test] -fn test_unescape_byte_bad() { - fn check(literal_text: &str, expected_error: EscapeError) { - assert_eq!(unescape_byte(literal_text), Err(expected_error)); - } - - check("", EscapeError::ZeroChars); - check(r"\", EscapeError::LoneSlash); - - check("\n", EscapeError::EscapeOnlyChar); - check("\t", EscapeError::EscapeOnlyChar); - check("'", EscapeError::EscapeOnlyChar); - check("\r", EscapeError::BareCarriageReturn); - - check("spam", EscapeError::MoreThanOneChar); - check(r"\x0ff", EscapeError::MoreThanOneChar); - check(r#"\"a"#, EscapeError::MoreThanOneChar); - check(r"\na", EscapeError::MoreThanOneChar); - check(r"\ra", EscapeError::MoreThanOneChar); - check(r"\ta", EscapeError::MoreThanOneChar); - check(r"\\a", EscapeError::MoreThanOneChar); - check(r"\'a", EscapeError::MoreThanOneChar); - check(r"\0a", EscapeError::MoreThanOneChar); - - check(r"\v", EscapeError::InvalidEscape); - check(r"\💩", EscapeError::InvalidEscape); - check(r"\●", EscapeError::InvalidEscape); - - check(r"\x", EscapeError::TooShortHexEscape); - check(r"\x0", EscapeError::TooShortHexEscape); - check(r"\xa", EscapeError::TooShortHexEscape); - check(r"\xf", EscapeError::TooShortHexEscape); - check(r"\xx", EscapeError::InvalidCharInHexEscape); - check(r"\xы", EscapeError::InvalidCharInHexEscape); - check(r"\x🦀", EscapeError::InvalidCharInHexEscape); - check(r"\xtt", EscapeError::InvalidCharInHexEscape); - - check(r"\u", EscapeError::NoBraceInUnicodeEscape); - check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); - check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); - check(r"\u{", EscapeError::UnclosedUnicodeEscape); - check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); - check(r"\u{}", EscapeError::EmptyUnicodeEscape); - check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); - check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); - - check("ы", EscapeError::NonAsciiCharInByte); - check("🦀", EscapeError::NonAsciiCharInByte); - - check(r"\u{0}", EscapeError::UnicodeEscapeInByte); - check(r"\u{000000}", EscapeError::UnicodeEscapeInByte); - check(r"\u{41}", EscapeError::UnicodeEscapeInByte); - check(r"\u{0041}", EscapeError::UnicodeEscapeInByte); - check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte); - check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte); - check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte); - check(r"\u{0}x", EscapeError::UnicodeEscapeInByte); - check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte); - check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte); - check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); - check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte); - check(r"\u{D800}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte); -} - -#[test] -fn test_unescape_byte_good() { - fn check(literal_text: &str, expected_byte: u8) { - assert_eq!(unescape_byte(literal_text), Ok(expected_byte)); - } - - check("a", b'a'); - - check(r#"\""#, b'"'); - check(r"\n", b'\n'); - check(r"\r", b'\r'); - check(r"\t", b'\t'); - check(r"\\", b'\\'); - check(r"\'", b'\''); - check(r"\0", b'\0'); - - check(r"\x00", b'\0'); - check(r"\x5a", b'Z'); - check(r"\x5A", b'Z'); - check(r"\x7f", 127); - check(r"\x80", 128); - check(r"\xff", 255); - check(r"\xFF", 255); -} - -#[test] -fn test_unescape_byte_str_good() { - fn check(literal_text: &str, expected: &[u8]) { - let mut buf = Ok(Vec::with_capacity(literal_text.len())); - unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| { - if let Ok(b) = &mut buf { - match c { - Ok(c) => b.push(byte_from_char(c)), - Err(e) => buf = Err((range, e)), - } - } - }); - assert_eq!(buf.as_deref(), Ok(expected)) - } - - check("foo", b"foo"); - check("", b""); - check(" \t\n", b" \t\n"); - - check("hello \\\n world", b"hello world"); - check("thread's", b"thread's") -} - -#[test] -fn test_unescape_raw_str() { - fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("\rx", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString)), (1..2, Ok('x'))]); -} - -#[test] -fn test_unescape_raw_byte_str() { - fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]); - check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]); -} diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 2b9113242aa..212107edb4d 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -843,9 +843,8 @@ fn analyze_attr(attr: &impl AttributeExt, state: &mut AnalyzeAttrState<'_>) -> b } } } - } else if attr.path().starts_with(&[sym::diagnostic]) && attr.path().len() == 2 { - should_encode = - rustc_feature::is_stable_diagnostic_attribute(attr.path()[1], state.features); + } else if let &[sym::diagnostic, seg] = &*attr.path() { + should_encode = rustc_feature::is_stable_diagnostic_attribute(seg, state.features); } else { should_encode = true; } diff --git a/compiler/rustc_middle/src/ty/diagnostics.rs b/compiler/rustc_middle/src/ty/diagnostics.rs index 43a1c2282e0..d3abb3d64b8 100644 --- a/compiler/rustc_middle/src/ty/diagnostics.rs +++ b/compiler/rustc_middle/src/ty/diagnostics.rs @@ -641,21 +641,19 @@ impl<'tcx> TypeVisitor<TyCtxt<'tcx>> for IsSuggestableVisitor<'tcx> { } } - Alias(Projection, AliasTy { def_id, .. }) => { - if self.tcx.def_kind(def_id) != DefKind::AssocTy { - return ControlFlow::Break(()); - } + Alias(Projection, AliasTy { def_id, .. }) + if self.tcx.def_kind(def_id) != DefKind::AssocTy => + { + return ControlFlow::Break(()); } - Param(param) => { - // FIXME: It would be nice to make this not use string manipulation, - // but it's pretty hard to do this, since `ty::ParamTy` is missing - // sufficient info to determine if it is synthetic, and we don't - // always have a convenient way of getting `ty::Generics` at the call - // sites we invoke `IsSuggestable::is_suggestable`. - if param.name.as_str().starts_with("impl ") { - return ControlFlow::Break(()); - } + // FIXME: It would be nice to make this not use string manipulation, + // but it's pretty hard to do this, since `ty::ParamTy` is missing + // sufficient info to determine if it is synthetic, and we don't + // always have a convenient way of getting `ty::Generics` at the call + // sites we invoke `IsSuggestable::is_suggestable`. + Param(param) if param.name.as_str().starts_with("impl ") => { + return ControlFlow::Break(()); } _ => {} @@ -733,17 +731,13 @@ impl<'tcx> FallibleTypeFolder<TyCtxt<'tcx>> for MakeSuggestableFolder<'tcx> { } } - Param(param) => { - // FIXME: It would be nice to make this not use string manipulation, - // but it's pretty hard to do this, since `ty::ParamTy` is missing - // sufficient info to determine if it is synthetic, and we don't - // always have a convenient way of getting `ty::Generics` at the call - // sites we invoke `IsSuggestable::is_suggestable`. - if param.name.as_str().starts_with("impl ") { - return Err(()); - } - - t + // FIXME: It would be nice to make this not use string manipulation, + // but it's pretty hard to do this, since `ty::ParamTy` is missing + // sufficient info to determine if it is synthetic, and we don't + // always have a convenient way of getting `ty::Generics` at the call + // sites we invoke `IsSuggestable::is_suggestable`. + Param(param) if param.name.as_str().starts_with("impl ") => { + return Err(()); } _ => t, diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index c9dcab0c871..e83f2d5a56d 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start bitflags = "2.4.1" +literal-escaper = { path = "../../library/literal-escaper" } rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } rustc_data_structures = { path = "../rustc_data_structures" } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 1d17290e1c7..4a3fda86c86 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,12 +1,12 @@ use std::ops::Range; +use literal_escaper::{self, EscapeError, Mode}; use rustc_ast::ast::{self, AttrStyle}; use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; -use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ @@ -970,7 +970,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_unicode(src, mode, &mut |span, result| { + literal_escaper::unescape_unicode(src, mode, &mut |span, result| { callback(span, result.map(drop)) }) }) @@ -986,7 +986,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_mixed(src, mode, &mut |span, result| { + literal_escaper::unescape_mixed(src, mode, &mut |span, result| { callback(span, result.map(drop)) }) }) diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 2e066f0179c..e8aa400e73d 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -3,8 +3,8 @@ use std::iter::once; use std::ops::Range; +use literal_escaper::{EscapeError, Mode}; use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed}; -use rustc_lexer::unescape::{EscapeError, Mode}; use rustc_span::{BytePos, Span}; use tracing::debug; diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 0774324eae7..85d94400b1c 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -6,6 +6,7 @@ use core::ops::{Bound, ControlFlow}; use ast::mut_visit::{self, MutVisitor}; use ast::token::{IdentIsRaw, MetaVarKind}; use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered}; +use literal_escaper::unescape_char; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenTree; @@ -21,7 +22,6 @@ use rustc_ast::{ use rustc_ast_pretty::pprust; use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic}; -use rustc_lexer::unescape::unescape_char; use rustc_macros::Subdiagnostic; use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error}; use rustc_session::lint::BuiltinLintDiag; diff --git a/compiler/rustc_parse_format/Cargo.toml b/compiler/rustc_parse_format/Cargo.toml index a39cca716d2..e63ed9e16f2 100644 --- a/compiler/rustc_parse_format/Cargo.toml +++ b/compiler/rustc_parse_format/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start +literal-escaper = { path = "../../library/literal-escaper" } rustc_index = { path = "../rustc_index", default-features = false } rustc_lexer = { path = "../rustc_lexer" } # tidy-alphabetical-end diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 5b8a2fe52d3..5780daf3034 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -18,7 +18,6 @@ pub use Alignment::*; pub use Count::*; pub use Position::*; -use rustc_lexer::unescape; // Note: copied from rustc_span /// Range inside of a `Span` used for diagnostics when we only have access to relative positions. @@ -1094,12 +1093,14 @@ fn find_width_map_from_snippet( fn unescape_string(string: &str) -> Option<String> { let mut buf = String::new(); let mut ok = true; - unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| { - match unescaped_char { + literal_escaper::unescape_unicode( + string, + literal_escaper::Mode::Str, + &mut |_, unescaped_char| match unescaped_char { Ok(c) => buf.push(c), Err(_) => ok = false, - } - }); + }, + ); ok.then_some(buf) } diff --git a/compiler/rustc_passes/src/dead.rs b/compiler/rustc_passes/src/dead.rs index 7029c60c343..d036cb74a56 100644 --- a/compiler/rustc_passes/src/dead.rs +++ b/compiler/rustc_passes/src/dead.rs @@ -1132,7 +1132,7 @@ impl<'tcx> DeadVisitor<'tcx> { return; } dead_codes.sort_by_key(|v| v.level); - for group in dead_codes[..].chunk_by(|a, b| a.level == b.level) { + for group in dead_codes.chunk_by(|a, b| a.level == b.level) { self.lint_at_single_level(&group, participle, Some(def_id), report_on); } } diff --git a/compiler/rustc_target/src/spec/targets/x86_64_pc_cygwin.rs b/compiler/rustc_target/src/spec/targets/x86_64_pc_cygwin.rs index 8da4fe6b8b1..eac4caf41c8 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_pc_cygwin.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_pc_cygwin.rs @@ -18,7 +18,7 @@ pub(crate) fn target() -> Target { description: Some("64-bit x86 Cygwin".into()), tier: Some(3), host_tools: Some(false), - std: None, + std: Some(true), }, } } |
