diff options
| author | bors <bors@rust-lang.org> | 2025-09-28 16:36:14 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-09-28 16:36:14 +0000 |
| commit | c8905eaa66e0c35a33626e974b9ce6955c739b5b (patch) | |
| tree | 52dd4bdf4bb5cfeab181f05a308c3bbd6d1959da /compiler/rustc_span/src/analyze_source_file.rs | |
| parent | 8d72d3e1e96f58ca10059a6bb6e8aecba4a0e9cd (diff) | |
| parent | 4eb6b8f43f37b612db58b9cd1682ee7256fb0c43 (diff) | |
| download | rust-c8905eaa66e0c35a33626e974b9ce6955c739b5b.tar.gz rust-c8905eaa66e0c35a33626e974b9ce6955c739b5b.zip | |
Auto merge of #147128 - matthiaskrgr:rollup-mqey4c4, r=matthiaskrgr
Rollup of 6 pull requests Successful merges: - rust-lang/rust#140482 (std::net: update tcp deferaccept delay type to Duration.) - rust-lang/rust#141469 (Allow `&raw [mut | const]` for union field in safe code) - rust-lang/rust#144197 (TypeTree support in autodiff) - rust-lang/rust#146675 (Allow shared access to `Exclusive<T>` when `T: Sync`) - rust-lang/rust#147113 (Reland "Add LSX accelerated implementation for source file analysis") - rust-lang/rust#147120 (Fix --extra-checks=spellcheck to prevent cargo install every time) r? `@ghost` `@rustbot` modify labels: rollup
Diffstat (limited to 'compiler/rustc_span/src/analyze_source_file.rs')
| -rw-r--r-- | compiler/rustc_span/src/analyze_source_file.rs | 109 |
1 files changed, 106 insertions, 3 deletions
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs index c32593a6d95..bb2cda77dff 100644 --- a/compiler/rustc_span/src/analyze_source_file.rs +++ b/compiler/rustc_span/src/analyze_source_file.rs @@ -81,8 +81,8 @@ cfg_select! { // use `loadu`, which supports unaligned loading. let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) }; - // For character in the chunk, see if its byte value is < 0, which - // indicates that it's part of a UTF-8 char. + // For each character in the chunk, see if its byte value is < 0, + // which indicates that it's part of a UTF-8 char. let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)); // Create a bit mask from the comparison results. let multibyte_mask = _mm_movemask_epi8(multibyte_test); @@ -132,8 +132,111 @@ cfg_select! { } } } + target_arch = "loongarch64" => { + fn analyze_source_file_dispatch( + src: &str, + lines: &mut Vec<RelativeBytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + ) { + use std::arch::is_loongarch_feature_detected; + + if is_loongarch_feature_detected!("lsx") { + unsafe { + analyze_source_file_lsx(src, lines, multi_byte_chars); + } + } else { + analyze_source_file_generic( + src, + src.len(), + RelativeBytePos::from_u32(0), + lines, + multi_byte_chars, + ); + } + } + + /// Checks 16 byte chunks of text at a time. If the chunk contains + /// something other than printable ASCII characters and newlines, the + /// function falls back to the generic implementation. Otherwise it uses + /// LSX intrinsics to quickly find all newlines. + #[target_feature(enable = "lsx")] + unsafe fn analyze_source_file_lsx( + src: &str, + lines: &mut Vec<RelativeBytePos>, + multi_byte_chars: &mut Vec<MultiByteChar>, + ) { + use std::arch::loongarch64::*; + + const CHUNK_SIZE: usize = 16; + + let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>(); + + // This variable keeps track of where we should start decoding a + // chunk. If a multi-byte character spans across chunk boundaries, + // we need to skip that part in the next chunk because we already + // handled it. + let mut intra_chunk_offset = 0; + + for (chunk_index, chunk) in chunks.iter().enumerate() { + // All LSX memory instructions support unaligned access, so using + // vld is fine. + let chunk = unsafe { lsx_vld::<0>(chunk.as_ptr() as *const i8) }; + + // For each character in the chunk, see if its byte value is < 0, + // which indicates that it's part of a UTF-8 char. + let multibyte_mask = lsx_vmskltz_b(chunk); + // Create a bit mask from the comparison results. + let multibyte_mask = lsx_vpickve2gr_w::<0>(multibyte_mask); + + // If the bit mask is all zero, we only have ASCII chars here: + if multibyte_mask == 0 { + assert!(intra_chunk_offset == 0); + + // Check for newlines in the chunk + let newlines_test = lsx_vseqi_b::<{b'\n' as i32}>(chunk); + let newlines_mask = lsx_vmskltz_b(newlines_test); + let mut newlines_mask = lsx_vpickve2gr_w::<0>(newlines_mask); + + let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1); + + while newlines_mask != 0 { + let index = newlines_mask.trailing_zeros(); + + lines.push(RelativeBytePos(index) + output_offset); + + // Clear the bit, so we can find the next one. + newlines_mask &= newlines_mask - 1; + } + } else { + // The slow path. + // There are multibyte chars in here, fallback to generic decoding. + let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset; + intra_chunk_offset = analyze_source_file_generic( + &src[scan_start..], + CHUNK_SIZE - intra_chunk_offset, + RelativeBytePos::from_usize(scan_start), + lines, + multi_byte_chars, + ); + } + } + + // There might still be a tail left to analyze + let tail_start = src.len() - tail.len() + intra_chunk_offset; + if tail_start < src.len() { + analyze_source_file_generic( + &src[tail_start..], + src.len() - tail_start, + RelativeBytePos::from_usize(tail_start), + lines, + multi_byte_chars, + ); + } + } + } _ => { - // The target (or compiler version) does not support SSE2 ... + // The target (or compiler version) does not support vector instructions + // our specialized implementations need (x86 SSE2, loongarch64 LSX)... fn analyze_source_file_dispatch( src: &str, lines: &mut Vec<RelativeBytePos>, |
