4 files changed, 141 insertions, 17 deletions
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs
index c32593a6d95..bb2cda77dff 100644
--- a/compiler/rustc_span/src/analyze_source_file.rs
+++ b/compiler/rustc_span/src/analyze_source_file.rs
@@ -81,8 +81,8 @@ cfg_select! {
                 // use `loadu`, which supports unaligned loading.
                 let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
 
-                // For character in the chunk, see if its byte value is < 0, which
-                // indicates that it's part of a UTF-8 char.
+                // For each character in the chunk, see if its byte value is < 0,
+                // which indicates that it's part of a UTF-8 char.
                 let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
                 // Create a bit mask from the comparison results.
                 let multibyte_mask = _mm_movemask_epi8(multibyte_test);
@@ -132,8 +132,111 @@ cfg_select! {
             }
         }
     }
+    target_arch = "loongarch64" => {
+        fn analyze_source_file_dispatch(
+            src: &str,
+            lines: &mut Vec<RelativeBytePos>,
+            multi_byte_chars: &mut Vec<MultiByteChar>,
+        ) {
+            use std::arch::is_loongarch_feature_detected;
+
+            if is_loongarch_feature_detected!("lsx") {
+                unsafe {
+                    analyze_source_file_lsx(src, lines, multi_byte_chars);
+                }
+            } else {
+                analyze_source_file_generic(
+                    src,
+                    src.len(),
+                    RelativeBytePos::from_u32(0),
+                    lines,
+                    multi_byte_chars,
+                );
+            }
+        }
+
+        /// Checks 16 byte chunks of text at a time. If the chunk contains
+        /// something other than printable ASCII characters and newlines, the
+        /// function falls back to the generic implementation. Otherwise it uses
+        /// LSX intrinsics to quickly find all newlines.
+        #[target_feature(enable = "lsx")]
+        unsafe fn analyze_source_file_lsx(
+            src: &str,
+            lines: &mut Vec<RelativeBytePos>,
+            multi_byte_chars: &mut Vec<MultiByteChar>,
+        ) {
+            use std::arch::loongarch64::*;
+
+            const CHUNK_SIZE: usize = 16;
+
+            let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
+
+            // This variable keeps track of where we should start decoding a
+            // chunk. If a multi-byte character spans across chunk boundaries,
+            // we need to skip that part in the next chunk because we already
+            // handled it.
+            let mut intra_chunk_offset = 0;
+
+            for (chunk_index, chunk) in chunks.iter().enumerate() {
+                // All LSX memory instructions support unaligned access, so using
+                // vld is fine.
+                let chunk = unsafe { lsx_vld::<0>(chunk.as_ptr() as *const i8) };
+
+                // For each character in the chunk, see if its byte value is < 0,
+                // which indicates that it's part of a UTF-8 char.
+                let multibyte_mask = lsx_vmskltz_b(chunk);
+                // Create a bit mask from the comparison results.
+                let multibyte_mask = lsx_vpickve2gr_w::<0>(multibyte_mask);
+
+                // If the bit mask is all zero, we only have ASCII chars here:
+                if multibyte_mask == 0 {
+                    assert!(intra_chunk_offset == 0);
+
+                    // Check for newlines in the chunk
+                    let newlines_test = lsx_vseqi_b::<{b'\n' as i32}>(chunk);
+                    let newlines_mask = lsx_vmskltz_b(newlines_test);
+                    let mut newlines_mask = lsx_vpickve2gr_w::<0>(newlines_mask);
+
+                    let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
+
+                    while newlines_mask != 0 {
+                        let index = newlines_mask.trailing_zeros();
+
+                        lines.push(RelativeBytePos(index) + output_offset);
+
+                        // Clear the bit, so we can find the next one.
+                        newlines_mask &= newlines_mask - 1;
+                    }
+                } else {
+                    // The slow path.
+                    // There are multibyte chars in here, fallback to generic decoding.
+                    let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
+                    intra_chunk_offset = analyze_source_file_generic(
+                        &src[scan_start..],
+                        CHUNK_SIZE - intra_chunk_offset,
+                        RelativeBytePos::from_usize(scan_start),
+                        lines,
+                        multi_byte_chars,
+                    );
+                }
+            }
+
+            // There might still be a tail left to analyze
+            let tail_start = src.len() - tail.len() + intra_chunk_offset;
+            if tail_start < src.len() {
+                analyze_source_file_generic(
+                    &src[tail_start..],
+                    src.len() - tail_start,
+                    RelativeBytePos::from_usize(tail_start),
+                    lines,
+                    multi_byte_chars,
+                );
+            }
+        }
+    }
     _ => {
-        // The target (or compiler version) does not support SSE2 ...
+        // The target (or compiler version) does not support vector instructions
+        // our specialized implementations need (x86 SSE2, loongarch64 LSX)...
         fn analyze_source_file_dispatch(
             src: &str,
             lines: &mut Vec<RelativeBytePos>,
diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs
index 19494ffc37e..633cbc3a4ae 100644
--- a/compiler/rustc_span/src/hygiene.rs
+++ b/compiler/rustc_span/src/hygiene.rs
@@ -46,6 +46,8 @@ use crate::symbol::{Symbol, kw, sym};
 use crate::{DUMMY_SP, HashStableContext, Span, SpanDecoder, SpanEncoder, with_session_globals};
 
 /// A `SyntaxContext` represents a chain of pairs `(ExpnId, Transparency)` named "marks".
+///
+/// See <https://rustc-dev-guide.rust-lang.org/macro-expansion.html> for more explanation.
 #[derive(Clone, Copy, PartialEq, Eq, Hash)]
 pub struct SyntaxContext(u32);
 
@@ -61,7 +63,10 @@ pub type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
 
 #[derive(Clone, Copy, Debug)]
 struct SyntaxContextData {
+    /// The last macro expansion in the chain.
+    /// (Here we say the most deeply nested macro expansion is the "outermost" expansion.)
     outer_expn: ExpnId,
+    /// Transparency of the last macro expansion
     outer_transparency: Transparency,
     parent: SyntaxContext,
     /// This context, but with all transparent and semi-opaque expansions filtered away.
@@ -450,11 +455,13 @@ impl HygieneData {
         self.syntax_context_data[ctxt.0 as usize].opaque_and_semiopaque
     }
 
+    /// See [`SyntaxContextData::outer_expn`]
     #[inline]
     fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId {
         self.syntax_context_data[ctxt.0 as usize].outer_expn
     }
 
+    /// The last macro expansion and its Transparency
     #[inline]
     fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) {
         let data = &self.syntax_context_data[ctxt.0 as usize];
@@ -900,6 +907,7 @@ impl SyntaxContext {
         HygieneData::with(|data| data.normalize_to_macro_rules(self))
     }
 
+    /// See [`SyntaxContextData::outer_expn`]
     #[inline]
     pub fn outer_expn(self) -> ExpnId {
         HygieneData::with(|data| data.outer_expn(self))
@@ -912,6 +920,7 @@ impl SyntaxContext {
         HygieneData::with(|data| data.expn_data(data.outer_expn(self)).clone())
     }
 
+    /// See [`HygieneData::outer_mark`]
     #[inline]
     fn outer_mark(self) -> (ExpnId, Transparency) {
         HygieneData::with(|data| data.outer_mark(self))
@@ -982,19 +991,20 @@ impl Span {
 #[derive(Clone, Debug, Encodable, Decodable, HashStable_Generic)]
 pub struct ExpnData {
     // --- The part unique to each expansion.
-    /// The kind of this expansion - macro or compiler desugaring.
     pub kind: ExpnKind,
-    /// The expansion that produced this expansion.
+    /// The expansion that contains the definition of the macro for this expansion.
     pub parent: ExpnId,
-    /// The location of the actual macro invocation or syntax sugar , e.g.
-    /// `let x = foo!();` or `if let Some(y) = x {}`
+    /// The span of the macro call which produced this expansion.
+    ///
+    /// This span will typically have a different `ExpnData` and `call_site`.
+    /// This recursively traces back through any macro calls which expanded into further
+    /// macro calls, until the "source call-site" is reached at the root SyntaxContext.
+    /// For example, if `food!()` expands to `fruit!()` which then expands to `grape`,
+    /// then the call-site of `grape` is `fruit!()` and the call-site of `fruit!()`
+    /// is `food!()`.
     ///
-    /// This may recursively refer to other macro invocations, e.g., if
-    /// `foo!()` invoked `bar!()` internally, and there was an
-    /// expression inside `bar!`; the call_site of the expression in
-    /// the expansion would point to the `bar!` invocation; that
-    /// call_site span would have its own ExpnData, with the call_site
-    /// pointing to the `foo!` invocation.
+    /// For a desugaring expansion, this is the span of the expression or node that was
+    /// desugared.
     pub call_site: Span,
     /// Used to force two `ExpnData`s to have different `Fingerprint`s.
     /// Due to macro expansion, it's possible to end up with two `ExpnId`s
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
index ae6755f0764..ededbea57e9 100644
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@@ -17,7 +17,7 @@
 
 // tidy-alphabetical-start
 #![allow(internal_features)]
-#![cfg_attr(bootstrap, feature(round_char_boundary))]
+#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]
 #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
 #![doc(rust_logo)]
 #![feature(array_windows)]
@@ -710,8 +710,8 @@ impl Span {
         if !ctxt.is_root() { ctxt.outer_expn_data().call_site.source_callsite() } else { self }
     }
 
-    /// The `Span` for the tokens in the previous macro expansion from which `self` was generated,
-    /// if any.
+    /// Returns the call-site span of the last macro expansion which produced this `Span`.
+    /// (see [`ExpnData::call_site`]). Returns `None` if this is not an expansion.
     pub fn parent_callsite(self) -> Option<Span> {
         let ctxt = self.ctxt();
         (!ctxt.is_root()).then(|| ctxt.outer_expn_data().call_site)
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index cdb0b5b58da..b34a64108e3 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -545,6 +545,7 @@ symbols! {
         attributes,
         audit_that,
         augmented_assignments,
+        auto_cfg,
         auto_traits,
         autodiff,
         autodiff_forward,
@@ -628,7 +629,6 @@ symbols! {
         cfg_emscripten_wasm_eh,
         cfg_eval,
         cfg_fmt_debug,
-        cfg_hide,
         cfg_overflow_checks,
         cfg_panic,
         cfg_relocation_model,
@@ -679,6 +679,7 @@ symbols! {
         cmpxchg16b_target_feature,
         cmse_nonsecure_entry,
         coerce_pointee_validated,
+        coerce_shared,
         coerce_unsized,
         cold,
         cold_path,
@@ -794,6 +795,7 @@ symbols! {
         ctlz,
         ctlz_nonzero,
         ctpop,
+        ctr,
         cttz,
         cttz_nonzero,
         custom_attribute,
@@ -938,6 +940,7 @@ symbols! {
         ermsb_target_feature,
         exact_div,
         except,
+        exception_handling: "exception-handling",
         exchange_malloc,
         exclusive_range_pattern,
         exhaustive_integer_patterns,
@@ -1148,6 +1151,7 @@ symbols! {
         hashset_iter_ty,
         hexagon_target_feature,
         hidden,
+        hide,
         hint,
         homogeneous_aggregate,
         host,
@@ -1195,6 +1199,7 @@ symbols! {
         if_let_rescope,
         if_while_or_patterns,
         ignore,
+        immediate_abort: "immediate-abort",
         impl_header_lifetime_elision,
         impl_lint_pass,
         impl_trait_in_assoc_type,
@@ -1333,6 +1338,7 @@ symbols! {
         loongarch_target_feature,
         loop_break_value,
         loop_match,
+        lr,
         lt,
         m68k_target_feature,
         macro_at_most_once_rep,
@@ -1413,6 +1419,7 @@ symbols! {
         mir_call,
         mir_cast_ptr_to_ptr,
         mir_cast_transmute,
+        mir_cast_unsize,
         mir_checked,
         mir_copy_for_deref,
         mir_debuginfo,
@@ -1917,6 +1924,8 @@ symbols! {
         rustc_no_mir_inline,
         rustc_nonnull_optimization_guaranteed,
         rustc_nounwind,
+        rustc_objc_class,
+        rustc_objc_selector,
         rustc_object_lifetime_default,
         rustc_on_unimplemented,
         rustc_outlives,
@@ -1937,6 +1946,7 @@ symbols! {
         rustc_regions,
         rustc_reservation_impl,
         rustc_serialize,
+        rustc_simd_monomorphize_lane_limit,
         rustc_skip_during_method_dispatch,
         rustc_specialization_trait,
         rustc_std_internal_symbol,
@@ -1980,6 +1990,7 @@ symbols! {
         shl_assign,
         shorter_tail_lifetimes,
         should_panic,
+        show,
         shr,
         shr_assign,
         sig_dfl,