diff options
Diffstat (limited to 'compiler/rustc_span')
| -rw-r--r-- | compiler/rustc_span/src/analyze_source_file.rs | 109 | ||||
| -rw-r--r-- | compiler/rustc_span/src/hygiene.rs | 30 | ||||
| -rw-r--r-- | compiler/rustc_span/src/lib.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol.rs | 13 |
4 files changed, 141 insertions, 17 deletions
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs index c32593a6d95..bb2cda77dff 100644 --- a/compiler/rustc_span/src/analyze_source_file.rs +++ b/compiler/rustc_span/src/analyze_source_file.rs @@ -81,8 +81,8 @@ cfg_select! { // use `loadu`, which supports unaligned loading. let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) }; - // For character in the chunk, see if its byte value is < 0, which - // indicates that it's part of a UTF-8 char. + // For each character in the chunk, see if its byte value is < 0, + // which indicates that it's part of a UTF-8 char. let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)); // Create a bit mask from the comparison results. let multibyte_mask = _mm_movemask_epi8(multibyte_test); @@ -132,8 +132,111 @@ cfg_select! { } } } + target_arch = "loongarch64" => { + fn analyze_source_file_dispatch( + src: &str, + lines: &mut Vec<RelativeBytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + ) { + use std::arch::is_loongarch_feature_detected; + + if is_loongarch_feature_detected!("lsx") { + unsafe { + analyze_source_file_lsx(src, lines, multi_byte_chars); + } + } else { + analyze_source_file_generic( + src, + src.len(), + RelativeBytePos::from_u32(0), + lines, + multi_byte_chars, + ); + } + } + + /// Checks 16 byte chunks of text at a time. If the chunk contains + /// something other than printable ASCII characters and newlines, the + /// function falls back to the generic implementation. Otherwise it uses + /// LSX intrinsics to quickly find all newlines. + #[target_feature(enable = "lsx")] + unsafe fn analyze_source_file_lsx( + src: &str, + lines: &mut Vec<RelativeBytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + ) { + use std::arch::loongarch64::*; + + const CHUNK_SIZE: usize = 16; + + let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>(); + + // This variable keeps track of where we should start decoding a + // chunk. If a multi-byte character spans across chunk boundaries, + // we need to skip that part in the next chunk because we already + // handled it. + let mut intra_chunk_offset = 0; + + for (chunk_index, chunk) in chunks.iter().enumerate() { + // All LSX memory instructions support unaligned access, so using + // vld is fine. + let chunk = unsafe { lsx_vld::<0>(chunk.as_ptr() as *const i8) }; + + // For each character in the chunk, see if its byte value is < 0, + // which indicates that it's part of a UTF-8 char. + let multibyte_mask = lsx_vmskltz_b(chunk); + // Create a bit mask from the comparison results. + let multibyte_mask = lsx_vpickve2gr_w::<0>(multibyte_mask); + + // If the bit mask is all zero, we only have ASCII chars here: + if multibyte_mask == 0 { + assert!(intra_chunk_offset == 0); + + // Check for newlines in the chunk + let newlines_test = lsx_vseqi_b::<{b'\n' as i32}>(chunk); + let newlines_mask = lsx_vmskltz_b(newlines_test); + let mut newlines_mask = lsx_vpickve2gr_w::<0>(newlines_mask); + + let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1); + + while newlines_mask != 0 { + let index = newlines_mask.trailing_zeros(); + + lines.push(RelativeBytePos(index) + output_offset); + + // Clear the bit, so we can find the next one. + newlines_mask &= newlines_mask - 1; + } + } else { + // The slow path. + // There are multibyte chars in here, fallback to generic decoding. + let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset; + intra_chunk_offset = analyze_source_file_generic( + &src[scan_start..], + CHUNK_SIZE - intra_chunk_offset, + RelativeBytePos::from_usize(scan_start), + lines, + multi_byte_chars, + ); + } + } + + // There might still be a tail left to analyze + let tail_start = src.len() - tail.len() + intra_chunk_offset; + if tail_start < src.len() { + analyze_source_file_generic( + &src[tail_start..], + src.len() - tail_start, + RelativeBytePos::from_usize(tail_start), + lines, + multi_byte_chars, + ); + } + } + } _ => { - // The target (or compiler version) does not support SSE2 ... + // The target (or compiler version) does not support vector instructions + // our specialized implementations need (x86 SSE2, loongarch64 LSX)... fn analyze_source_file_dispatch( src: &str, lines: &mut Vec<RelativeBytePos>, diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs index 19494ffc37e..633cbc3a4ae 100644 --- a/compiler/rustc_span/src/hygiene.rs +++ b/compiler/rustc_span/src/hygiene.rs @@ -46,6 +46,8 @@ use crate::symbol::{Symbol, kw, sym}; use crate::{DUMMY_SP, HashStableContext, Span, SpanDecoder, SpanEncoder, with_session_globals}; /// A `SyntaxContext` represents a chain of pairs `(ExpnId, Transparency)` named "marks". +/// +/// See <https://rustc-dev-guide.rust-lang.org/macro-expansion.html> for more explanation. #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct SyntaxContext(u32); @@ -61,7 +63,10 @@ pub type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency); #[derive(Clone, Copy, Debug)] struct SyntaxContextData { + /// The last macro expansion in the chain. + /// (Here we say the most deeply nested macro expansion is the "outermost" expansion.) outer_expn: ExpnId, + /// Transparency of the last macro expansion outer_transparency: Transparency, parent: SyntaxContext, /// This context, but with all transparent and semi-opaque expansions filtered away. @@ -450,11 +455,13 @@ impl HygieneData { self.syntax_context_data[ctxt.0 as usize].opaque_and_semiopaque } + /// See [`SyntaxContextData::outer_expn`] #[inline] fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId { self.syntax_context_data[ctxt.0 as usize].outer_expn } + /// The last macro expansion and its Transparency #[inline] fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) { let data = &self.syntax_context_data[ctxt.0 as usize]; @@ -900,6 +907,7 @@ impl SyntaxContext { HygieneData::with(|data| data.normalize_to_macro_rules(self)) } + /// See [`SyntaxContextData::outer_expn`] #[inline] pub fn outer_expn(self) -> ExpnId { HygieneData::with(|data| data.outer_expn(self)) @@ -912,6 +920,7 @@ impl SyntaxContext { HygieneData::with(|data| data.expn_data(data.outer_expn(self)).clone()) } + /// See [`HygieneData::outer_mark`] #[inline] fn outer_mark(self) -> (ExpnId, Transparency) { HygieneData::with(|data| data.outer_mark(self)) @@ -982,19 +991,20 @@ impl Span { #[derive(Clone, Debug, Encodable, Decodable, HashStable_Generic)] pub struct ExpnData { // --- The part unique to each expansion. - /// The kind of this expansion - macro or compiler desugaring. pub kind: ExpnKind, - /// The expansion that produced this expansion. + /// The expansion that contains the definition of the macro for this expansion. pub parent: ExpnId, - /// The location of the actual macro invocation or syntax sugar , e.g. - /// `let x = foo!();` or `if let Some(y) = x {}` + /// The span of the macro call which produced this expansion. + /// + /// This span will typically have a different `ExpnData` and `call_site`. + /// This recursively traces back through any macro calls which expanded into further + /// macro calls, until the "source call-site" is reached at the root SyntaxContext. + /// For example, if `food!()` expands to `fruit!()` which then expands to `grape`, + /// then the call-site of `grape` is `fruit!()` and the call-site of `fruit!()` + /// is `food!()`. /// - /// This may recursively refer to other macro invocations, e.g., if - /// `foo!()` invoked `bar!()` internally, and there was an - /// expression inside `bar!`; the call_site of the expression in - /// the expansion would point to the `bar!` invocation; that - /// call_site span would have its own ExpnData, with the call_site - /// pointing to the `foo!` invocation. + /// For a desugaring expansion, this is the span of the expression or node that was + /// desugared. pub call_site: Span, /// Used to force two `ExpnData`s to have different `Fingerprint`s. /// Due to macro expansion, it's possible to end up with two `ExpnId`s diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index ae6755f0764..ededbea57e9 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -17,7 +17,7 @@ // tidy-alphabetical-start #![allow(internal_features)] -#![cfg_attr(bootstrap, feature(round_char_boundary))] +#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))] #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] #![doc(rust_logo)] #![feature(array_windows)] @@ -710,8 +710,8 @@ impl Span { if !ctxt.is_root() { ctxt.outer_expn_data().call_site.source_callsite() } else { self } } - /// The `Span` for the tokens in the previous macro expansion from which `self` was generated, - /// if any. + /// Returns the call-site span of the last macro expansion which produced this `Span`. + /// (see [`ExpnData::call_site`]). Returns `None` if this is not an expansion. pub fn parent_callsite(self) -> Option<Span> { let ctxt = self.ctxt(); (!ctxt.is_root()).then(|| ctxt.outer_expn_data().call_site) diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index cdb0b5b58da..b34a64108e3 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -545,6 +545,7 @@ symbols! { attributes, audit_that, augmented_assignments, + auto_cfg, auto_traits, autodiff, autodiff_forward, @@ -628,7 +629,6 @@ symbols! { cfg_emscripten_wasm_eh, cfg_eval, cfg_fmt_debug, - cfg_hide, cfg_overflow_checks, cfg_panic, cfg_relocation_model, @@ -679,6 +679,7 @@ symbols! { cmpxchg16b_target_feature, cmse_nonsecure_entry, coerce_pointee_validated, + coerce_shared, coerce_unsized, cold, cold_path, @@ -794,6 +795,7 @@ symbols! { ctlz, ctlz_nonzero, ctpop, + ctr, cttz, cttz_nonzero, custom_attribute, @@ -938,6 +940,7 @@ symbols! { ermsb_target_feature, exact_div, except, + exception_handling: "exception-handling", exchange_malloc, exclusive_range_pattern, exhaustive_integer_patterns, @@ -1148,6 +1151,7 @@ symbols! { hashset_iter_ty, hexagon_target_feature, hidden, + hide, hint, homogeneous_aggregate, host, @@ -1195,6 +1199,7 @@ symbols! { if_let_rescope, if_while_or_patterns, ignore, + immediate_abort: "immediate-abort", impl_header_lifetime_elision, impl_lint_pass, impl_trait_in_assoc_type, @@ -1333,6 +1338,7 @@ symbols! { loongarch_target_feature, loop_break_value, loop_match, + lr, lt, m68k_target_feature, macro_at_most_once_rep, @@ -1413,6 +1419,7 @@ symbols! { mir_call, mir_cast_ptr_to_ptr, mir_cast_transmute, + mir_cast_unsize, mir_checked, mir_copy_for_deref, mir_debuginfo, @@ -1917,6 +1924,8 @@ symbols! { rustc_no_mir_inline, rustc_nonnull_optimization_guaranteed, rustc_nounwind, + rustc_objc_class, + rustc_objc_selector, rustc_object_lifetime_default, rustc_on_unimplemented, rustc_outlives, @@ -1937,6 +1946,7 @@ symbols! { rustc_regions, rustc_reservation_impl, rustc_serialize, + rustc_simd_monomorphize_lane_limit, rustc_skip_during_method_dispatch, rustc_specialization_trait, rustc_std_internal_symbol, @@ -1980,6 +1990,7 @@ symbols! { shl_assign, shorter_tail_lifetimes, should_panic, + show, shr, shr_assign, sig_dfl, |
