5 files changed, 74 insertions, 259 deletions
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs
index fba20566580..141d261b5f0 100644
--- a/compiler/rustc_span/src/analyze_source_file.rs
+++ b/compiler/rustc_span/src/analyze_source_file.rs
@@ -29,165 +29,6 @@ pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<Multi
     (lines, multi_byte_chars)
 }
 
-#[cfg(bootstrap)]
-cfg_match! {
-    cfg(any(target_arch = "x86", target_arch = "x86_64")) => {
-        fn analyze_source_file_dispatch(
-            src: &str,
-            lines: &mut Vec<RelativeBytePos>,
-            multi_byte_chars: &mut Vec<MultiByteChar>,
-        ) {
-            if is_x86_feature_detected!("sse2") {
-                unsafe {
-                    analyze_source_file_sse2(src, lines, multi_byte_chars);
-                }
-            } else {
-                analyze_source_file_generic(
-                    src,
-                    src.len(),
-                    RelativeBytePos::from_u32(0),
-                    lines,
-                    multi_byte_chars,
-                );
-            }
-        }
-
-        /// Checks 16 byte chunks of text at a time. If the chunk contains
-        /// something other than printable ASCII characters and newlines, the
-        /// function falls back to the generic implementation. Otherwise it uses
-        /// SSE2 intrinsics to quickly find all newlines.
-        #[target_feature(enable = "sse2")]
-        unsafe fn analyze_source_file_sse2(
-            src: &str,
-            lines: &mut Vec<RelativeBytePos>,
-            multi_byte_chars: &mut Vec<MultiByteChar>,
-        ) {
-            #[cfg(target_arch = "x86")]
-            use std::arch::x86::*;
-            #[cfg(target_arch = "x86_64")]
-            use std::arch::x86_64::*;
-
-            const CHUNK_SIZE: usize = 16;
-
-            let src_bytes = src.as_bytes();
-
-            let chunk_count = src.len() / CHUNK_SIZE;
-
-            // This variable keeps track of where we should start decoding a
-            // chunk. If a multi-byte character spans across chunk boundaries,
-            // we need to skip that part in the next chunk because we already
-            // handled it.
-            let mut intra_chunk_offset = 0;
-
-            for chunk_index in 0..chunk_count {
-                let ptr = src_bytes.as_ptr() as *const __m128i;
-                // We don't know if the pointer is aligned to 16 bytes, so we
-                // use `loadu`, which supports unaligned loading.
-                let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
-
-                // For character in the chunk, see if its byte value is < 0, which
-                // indicates that it's part of a UTF-8 char.
-                let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
-                // Create a bit mask from the comparison results.
-                let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };
-
-                // If the bit mask is all zero, we only have ASCII chars here:
-                if multibyte_mask == 0 {
-                    assert!(intra_chunk_offset == 0);
-
-                    // Check if there are any control characters in the chunk. All
-                    // control characters that we can encounter at this point have a
-                    // byte value less than 32 or ...
-                    let control_char_test0 = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(32)) };
-                    let control_char_mask0 = unsafe { _mm_movemask_epi8(control_char_test0) };
-
-                    // ... it's the ASCII 'DEL' character with a value of 127.
-                    let control_char_test1 = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127)) };
-                    let control_char_mask1 = unsafe { _mm_movemask_epi8(control_char_test1) };
-
-                    let control_char_mask = control_char_mask0 | control_char_mask1;
-
-                    if control_char_mask != 0 {
-                        // Check for newlines in the chunk
-                        let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
-                        let newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
-
-                        if control_char_mask == newlines_mask {
-                            // All control characters are newlines, record them
-                            let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
-                            let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
-
-                            loop {
-                                let index = newlines_mask.trailing_zeros();
-
-                                if index >= CHUNK_SIZE as u32 {
-                                    // We have arrived at the end of the chunk.
-                                    break;
-                                }
-
-                                lines.push(RelativeBytePos(index) + output_offset);
-
-                                // Clear the bit, so we can find the next one.
-                                newlines_mask &= (!1) << index;
-                            }
-
-                            // We are done for this chunk. All control characters were
-                            // newlines and we took care of those.
-                            continue;
-                        } else {
-                            // Some of the control characters are not newlines,
-                            // fall through to the slow path below.
-                        }
-                    } else {
-                        // No control characters, nothing to record for this chunk
-                        continue;
-                    }
-                }
-
-                // The slow path.
-                // There are control chars in here, fallback to generic decoding.
-                let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
-                intra_chunk_offset = analyze_source_file_generic(
-                    &src[scan_start..],
-                    CHUNK_SIZE - intra_chunk_offset,
-                    RelativeBytePos::from_usize(scan_start),
-                    lines,
-                    multi_byte_chars,
-                );
-            }
-
-            // There might still be a tail left to analyze
-            let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
-            if tail_start < src.len() {
-                analyze_source_file_generic(
-                    &src[tail_start..],
-                    src.len() - tail_start,
-                    RelativeBytePos::from_usize(tail_start),
-                    lines,
-                    multi_byte_chars,
-                );
-            }
-        }
-    }
-    _ => {
-        // The target (or compiler version) does not support SSE2 ...
-        fn analyze_source_file_dispatch(
-            src: &str,
-            lines: &mut Vec<RelativeBytePos>,
-            multi_byte_chars: &mut Vec<MultiByteChar>,
-        ) {
-            analyze_source_file_generic(
-                src,
-                src.len(),
-                RelativeBytePos::from_u32(0),
-                lines,
-                multi_byte_chars,
-            );
-        }
-    }
-}
-
-#[cfg(not(bootstrap))]
 cfg_match! {
     any(target_arch = "x86", target_arch = "x86_64") => {
         fn analyze_source_file_dispatch(
@@ -227,9 +68,7 @@ cfg_match! {
 
             const CHUNK_SIZE: usize = 16;
 
-            let src_bytes = src.as_bytes();
-
-            let chunk_count = src.len() / CHUNK_SIZE;
+            let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
 
             // This variable keeps track of where we should start decoding a
             // chunk. If a multi-byte character spans across chunk boundaries,
@@ -237,11 +76,10 @@ cfg_match! {
             // handled it.
             let mut intra_chunk_offset = 0;
 
-            for chunk_index in 0..chunk_count {
-                let ptr = src_bytes.as_ptr() as *const __m128i;
+            for (chunk_index, chunk) in chunks.iter().enumerate() {
                 // We don't know if the pointer is aligned to 16 bytes, so we
                 // use `loadu`, which supports unaligned loading.
-                let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
+                let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
 
                 // For character in the chunk, see if its byte value is < 0, which
                 // indicates that it's part of a UTF-8 char.
@@ -253,69 +91,36 @@ cfg_match! {
                 if multibyte_mask == 0 {
                     assert!(intra_chunk_offset == 0);
 
-                    // Check if there are any control characters in the chunk. All
-                    // control characters that we can encounter at this point have a
-                    // byte value less than 32 or ...
-                    let control_char_test0 = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(32)) };
-                    let control_char_mask0 = unsafe { _mm_movemask_epi8(control_char_test0) };
-
-                    // ... it's the ASCII 'DEL' character with a value of 127.
-                    let control_char_test1 = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127)) };
-                    let control_char_mask1 = unsafe { _mm_movemask_epi8(control_char_test1) };
-
-                    let control_char_mask = control_char_mask0 | control_char_mask1;
-
-                    if control_char_mask != 0 {
-                        // Check for newlines in the chunk
-                        let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
-                        let newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
+                    // Check for newlines in the chunk
+                    let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
+                    let mut newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
 
-                        if control_char_mask == newlines_mask {
-                            // All control characters are newlines, record them
-                            let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
-                            let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
+                    let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
 
-                            loop {
-                                let index = newlines_mask.trailing_zeros();
+                    while newlines_mask != 0 {
+                        let index = newlines_mask.trailing_zeros();
 
-                                if index >= CHUNK_SIZE as u32 {
-                                    // We have arrived at the end of the chunk.
-                                    break;
-                                }
+                        lines.push(RelativeBytePos(index) + output_offset);
 
-                                lines.push(RelativeBytePos(index) + output_offset);
-
-                                // Clear the bit, so we can find the next one.
-                                newlines_mask &= (!1) << index;
-                            }
-
-                            // We are done for this chunk. All control characters were
-                            // newlines and we took care of those.
-                            continue;
-                        } else {
-                            // Some of the control characters are not newlines,
-                            // fall through to the slow path below.
-                        }
-                    } else {
-                        // No control characters, nothing to record for this chunk
-                        continue;
+                        // Clear the bit, so we can find the next one.
+                        newlines_mask &= newlines_mask - 1;
                     }
+                } else {
+                    // The slow path.
+                    // There are multibyte chars in here, fallback to generic decoding.
+                    let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
+                    intra_chunk_offset = analyze_source_file_generic(
+                        &src[scan_start..],
+                        CHUNK_SIZE - intra_chunk_offset,
+                        RelativeBytePos::from_usize(scan_start),
+                        lines,
+                        multi_byte_chars,
+                    );
                 }
-
-                // The slow path.
-                // There are control chars in here, fallback to generic decoding.
-                let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
-                intra_chunk_offset = analyze_source_file_generic(
-                    &src[scan_start..],
-                    CHUNK_SIZE - intra_chunk_offset,
-                    RelativeBytePos::from_usize(scan_start),
-                    lines,
-                    multi_byte_chars,
-                );
             }
 
             // There might still be a tail left to analyze
-            let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
+            let tail_start = src.len() - tail.len() + intra_chunk_offset;
             if tail_start < src.len() {
                 analyze_source_file_generic(
                     &src[tail_start..],
@@ -369,29 +174,18 @@ fn analyze_source_file_generic(
         // string.
         let mut char_len = 1;
 
-        if byte < 32 {
-            // This is an ASCII control character, it could be one of the cases
-            // that are interesting to us.
-
+        if byte == b'\n' {
             let pos = RelativeBytePos::from_usize(i) + output_offset;
-
-            if let b'\n' = byte {
-                lines.push(pos + RelativeBytePos(1));
-            }
-        } else if byte >= 127 {
-            // The slow path:
-            // This is either ASCII control character "DEL" or the beginning of
-            // a multibyte char. Just decode to `char`.
+            lines.push(pos + RelativeBytePos(1));
+        } else if byte >= 128 {
+            // This is the beginning of a multibyte char. Just decode to `char`.
             let c = src[i..].chars().next().unwrap();
             char_len = c.len_utf8();
 
             let pos = RelativeBytePos::from_usize(i) + output_offset;
-
-            if char_len > 1 {
-                assert!((2..=4).contains(&char_len));
-                let mbc = MultiByteChar { pos, bytes: char_len as u8 };
-                multi_byte_chars.push(mbc);
-            }
+            assert!((2..=4).contains(&char_len));
+            let mbc = MultiByteChar { pos, bytes: char_len as u8 };
+            multi_byte_chars.push(mbc);
         }
 
         i += char_len;
diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs
index f61ce37131e..641bac88ad0 100644
--- a/compiler/rustc_span/src/def_id.rs
+++ b/compiler/rustc_span/src/def_id.rs
@@ -3,10 +3,9 @@ use std::hash::{BuildHasherDefault, Hash, Hasher};
 
 use rustc_data_structures::AtomicRef;
 use rustc_data_structures::fingerprint::Fingerprint;
-use rustc_data_structures::stable_hasher::{
-    Hash64, HashStable, StableHasher, StableOrd, ToStableHashKey,
-};
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher, StableOrd, ToStableHashKey};
 use rustc_data_structures::unhash::Unhasher;
+use rustc_hashes::Hash64;
 use rustc_index::Idx;
 use rustc_macros::{Decodable, Encodable, HashStable_Generic};
 use rustc_serialize::{Decodable, Encodable};
diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs
index 2910bcdf51d..84e89ff4b7d 100644
--- a/compiler/rustc_span/src/hygiene.rs
+++ b/compiler/rustc_span/src/hygiene.rs
@@ -33,9 +33,10 @@ use std::sync::Arc;
 
 use rustc_data_structures::fingerprint::Fingerprint;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
-use rustc_data_structures::stable_hasher::{Hash64, HashStable, HashingControls, StableHasher};
+use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
 use rustc_data_structures::sync::{Lock, WorkerLocal};
 use rustc_data_structures::unhash::UnhashMap;
+use rustc_hashes::Hash64;
 use rustc_index::IndexVec;
 use rustc_macros::{Decodable, Encodable, HashStable_Generic};
 use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
@@ -174,6 +175,12 @@ pub enum Transparency {
     Opaque,
 }
 
+impl Transparency {
+    pub fn fallback(macro_rules: bool) -> Self {
+        if macro_rules { Transparency::SemiTransparent } else { Transparency::Opaque }
+    }
+}
+
 impl LocalExpnId {
     /// The ID of the theoretical expansion that generates freshly parsed, unexpanded AST.
     pub const ROOT: LocalExpnId = LocalExpnId::ZERO;
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
index 0e146baef37..bca9323a50d 100644
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@@ -31,6 +31,7 @@
 #![feature(round_char_boundary)]
 #![feature(rustc_attrs)]
 #![feature(rustdoc_internals)]
+#![feature(slice_as_chunks)]
 #![warn(unreachable_pub)]
 // tidy-alphabetical-end
 
@@ -87,9 +88,10 @@ use std::sync::Arc;
 use std::{fmt, iter};
 
 use md5::{Digest, Md5};
-use rustc_data_structures::stable_hasher::{Hash64, Hash128, HashStable, StableHasher};
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::sync::{FreezeLock, FreezeWriteGuard, Lock};
 use rustc_data_structures::unord::UnordMap;
+use rustc_hashes::{Hash64, Hash128};
 use sha1::Sha1;
 use sha2::Sha256;
 
@@ -403,7 +405,7 @@ impl fmt::Display for FileNameDisplay<'_> {
 impl<'a> FileNameDisplay<'a> {
     pub fn to_string_lossy(&self) -> Cow<'a, str> {
         match self.inner {
-            FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref),
+            FileName::Real(inner) => inner.to_string_lossy(self.display_pref),
             _ => Cow::from(self.to_string()),
         }
     }
@@ -1441,7 +1443,7 @@ pub enum ExternalSourceKind {
 impl ExternalSource {
     pub fn get_source(&self) -> Option<&str> {
         match self {
-            ExternalSource::Foreign { kind: ExternalSourceKind::Present(ref src), .. } => Some(src),
+            ExternalSource::Foreign { kind: ExternalSourceKind::Present(src), .. } => Some(src),
             _ => None,
         }
     }
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index c819d433235..172c2faca96 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -104,6 +104,8 @@ symbols! {
         Gen:                "gen", // >= 2024 Edition only
         Try:                "try", // >= 2018 Edition only
 
+        // NOTE: When adding new keywords, consider adding them to the ui/parser/raw/raw-idents.rs test.
+
         // "Lifetime keywords": regular keywords with a leading `'`.
         // Matching predicates: `is_any_keyword`
         UnderscoreLifetime: "'_",
@@ -190,9 +192,11 @@ symbols! {
         Capture,
         Cell,
         Center,
+        Child,
         Cleanup,
         Clone,
         CoercePointee,
+        CoercePointeeValidated,
         CoerceUnsized,
         Command,
         ConstParamTy,
@@ -249,6 +253,7 @@ symbols! {
         Into,
         IntoFuture,
         IntoIterator,
+        IoBufRead,
         IoLines,
         IoRead,
         IoSeek,
@@ -284,6 +289,7 @@ symbols! {
         OsString,
         Output,
         Param,
+        ParamSet,
         PartialEq,
         PartialOrd,
         Path,
@@ -332,6 +338,7 @@ symbols! {
         SliceIter,
         Some,
         SpanCtxt,
+        Stdin,
         String,
         StructuralPartialEq,
         SubdiagMessage,
@@ -514,6 +521,8 @@ symbols! {
         bang,
         begin_panic,
         bench,
+        bevy_ecs,
+        bikeshed_guaranteed_no_drop,
         bin,
         binaryheap_iter,
         bind_by_move_pattern_guards,
@@ -594,6 +603,9 @@ symbols! {
         cfi,
         cfi_encoding,
         char,
+        char_is_ascii,
+        child_id,
+        child_kill,
         client,
         clippy,
         clobber_abi,
@@ -619,6 +631,7 @@ symbols! {
         cmp_partialord_lt,
         cmpxchg16b_target_feature,
         cmse_nonsecure_entry,
+        coerce_pointee_validated,
         coerce_unsized,
         cold,
         cold_path,
@@ -877,6 +890,7 @@ symbols! {
         extern_crate_self,
         extern_in_paths,
         extern_prelude,
+        extern_system_varargs,
         extern_types,
         external,
         external_doc,
@@ -1024,6 +1038,7 @@ symbols! {
         generic_const_exprs,
         generic_const_items,
         generic_param_attrs,
+        generic_pattern_types,
         get_context,
         global_alloc_ty,
         global_allocator,
@@ -1352,10 +1367,6 @@ symbols! {
         native_link_modifiers_whole_archive,
         natvis_file,
         ne,
-        nearbyintf128,
-        nearbyintf16,
-        nearbyintf32,
-        nearbyintf64,
         needs_allocator,
         needs_drop,
         needs_panic_runtime,
@@ -1460,6 +1471,7 @@ symbols! {
         panic_2015,
         panic_2021,
         panic_abort,
+        panic_any,
         panic_bounds_check,
         panic_cannot_unwind,
         panic_const_add_overflow,
@@ -1506,7 +1518,7 @@ symbols! {
         path_main_separator,
         path_to_pathbuf,
         pathbuf_as_path,
-        pattern_complexity,
+        pattern_complexity_limit,
         pattern_parentheses,
         pattern_type,
         pattern_types,
@@ -1565,6 +1577,7 @@ symbols! {
         proc_macro_mod,
         proc_macro_non_items,
         proc_macro_path_invoc,
+        process_abort,
         process_exit,
         profiler_builtins,
         profiler_runtime,
@@ -1671,20 +1684,16 @@ symbols! {
         return_position_impl_trait_in_trait,
         return_type_notation,
         rhs,
-        rintf128,
-        rintf16,
-        rintf32,
-        rintf64,
         riscv_target_feature,
         rlib,
         ropi,
         ropi_rwpi: "ropi-rwpi",
         rotate_left,
         rotate_right,
-        roundevenf128,
-        roundevenf16,
-        roundevenf32,
-        roundevenf64,
+        round_ties_even_f128,
+        round_ties_even_f16,
+        round_ties_even_f32,
+        round_ties_even_f64,
         roundf128,
         roundf16,
         roundf32,
@@ -1755,7 +1764,6 @@ symbols! {
         rustc_insignificant_dtor,
         rustc_intrinsic,
         rustc_intrinsic_const_stable_indirect,
-        rustc_intrinsic_must_be_overridden,
         rustc_layout,
         rustc_layout_scalar_valid_range_end,
         rustc_layout_scalar_valid_range_start,
@@ -1907,7 +1915,7 @@ symbols! {
         simd_shl,
         simd_shr,
         simd_shuffle,
-        simd_shuffle_generic,
+        simd_shuffle_const_generic,
         simd_sub,
         simd_trunc,
         simd_with_exposed_provenance,
@@ -1967,6 +1975,10 @@ symbols! {
         str_from_utf8_mut,
         str_from_utf8_unchecked,
         str_from_utf8_unchecked_mut,
+        str_inherent_from_utf8,
+        str_inherent_from_utf8_mut,
+        str_inherent_from_utf8_unchecked,
+        str_inherent_from_utf8_unchecked_mut,
         str_len,
         str_split_whitespace,
         str_starts_with,
@@ -1991,6 +2003,7 @@ symbols! {
         sub_assign,
         sub_with_overflow,
         suggestion,
+        supertrait_item_shadowing,
         surface_async_drop_in_place,
         sym,
         sync,