Adjust syntax

author: Caio <c410.f3r@gmail.com> 2024-12-22 17:12:42 -0300
committer: Caio <c410.f3r@gmail.com> 2024-12-22 17:12:42 -0300
commit: c89f0dc01d08cbc79a7bcb8a308585fb1a6544f1 (patch)
tree: 145aaea6a93c0d9f968ffa87fafb9f4d0ba2f849 /compiler/rustc_span/src/analyze_source_file.rs
parent: 426d1734238e3c5f52e935ba4f617f3a9f43b59d (diff)
download: rust-c89f0dc01d08cbc79a7bcb8a308585fb1a6544f1.tar.gz
rust-c89f0dc01d08cbc79a7bcb8a308585fb1a6544f1.zip
1 files changed, 160 insertions, 0 deletions
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs
index 28ce883daee..fba20566580 100644
--- a/compiler/rustc_span/src/analyze_source_file.rs
+++ b/compiler/rustc_span/src/analyze_source_file.rs
@@ -29,6 +29,7 @@ pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<Multi
     (lines, multi_byte_chars)
 }
 
+#[cfg(bootstrap)]
 cfg_match! {
     cfg(any(target_arch = "x86", target_arch = "x86_64")) => {
         fn analyze_source_file_dispatch(
@@ -185,6 +186,165 @@ cfg_match! {
         }
     }
 }
+
+#[cfg(not(bootstrap))]
+cfg_match! {
+    any(target_arch = "x86", target_arch = "x86_64") => {
+        fn analyze_source_file_dispatch(
+            src: &str,
+            lines: &mut Vec<RelativeBytePos>,
+            multi_byte_chars: &mut Vec<MultiByteChar>,
+        ) {
+            if is_x86_feature_detected!("sse2") {
+                unsafe {
+                    analyze_source_file_sse2(src, lines, multi_byte_chars);
+                }
+            } else {
+                analyze_source_file_generic(
+                    src,
+                    src.len(),
+                    RelativeBytePos::from_u32(0),
+                    lines,
+                    multi_byte_chars,
+                );
+            }
+        }
+
+        /// Checks 16 byte chunks of text at a time. If the chunk contains
+        /// something other than printable ASCII characters and newlines, the
+        /// function falls back to the generic implementation. Otherwise it uses
+        /// SSE2 intrinsics to quickly find all newlines.
+        #[target_feature(enable = "sse2")]
+        unsafe fn analyze_source_file_sse2(
+            src: &str,
+            lines: &mut Vec<RelativeBytePos>,
+            multi_byte_chars: &mut Vec<MultiByteChar>,
+        ) {
+            #[cfg(target_arch = "x86")]
+            use std::arch::x86::*;
+            #[cfg(target_arch = "x86_64")]
+            use std::arch::x86_64::*;
+
+            const CHUNK_SIZE: usize = 16;
+
+            let src_bytes = src.as_bytes();
+
+            let chunk_count = src.len() / CHUNK_SIZE;
+
+            // This variable keeps track of where we should start decoding a
+            // chunk. If a multi-byte character spans across chunk boundaries,
+            // we need to skip that part in the next chunk because we already
+            // handled it.
+            let mut intra_chunk_offset = 0;
+
+            for chunk_index in 0..chunk_count {
+                let ptr = src_bytes.as_ptr() as *const __m128i;
+                // We don't know if the pointer is aligned to 16 bytes, so we
+                // use `loadu`, which supports unaligned loading.
+                let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
+
+                // For character in the chunk, see if its byte value is < 0, which
+                // indicates that it's part of a UTF-8 char.
+                let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
+                // Create a bit mask from the comparison results.
+                let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };
+
+                // If the bit mask is all zero, we only have ASCII chars here:
+                if multibyte_mask == 0 {
+                    assert!(intra_chunk_offset == 0);
+
+                    // Check if there are any control characters in the chunk. All
+                    // control characters that we can encounter at this point have a
+                    // byte value less than 32 or ...
+                    let control_char_test0 = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(32)) };
+                    let control_char_mask0 = unsafe { _mm_movemask_epi8(control_char_test0) };
+
+                    // ... it's the ASCII 'DEL' character with a value of 127.
+                    let control_char_test1 = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127)) };
+                    let control_char_mask1 = unsafe { _mm_movemask_epi8(control_char_test1) };
+
+                    let control_char_mask = control_char_mask0 | control_char_mask1;
+
+                    if control_char_mask != 0 {
+                        // Check for newlines in the chunk
+                        let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
+                        let newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
+
+                        if control_char_mask == newlines_mask {
+                            // All control characters are newlines, record them
+                            let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
+                            let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
+
+                            loop {
+                                let index = newlines_mask.trailing_zeros();
+
+                                if index >= CHUNK_SIZE as u32 {
+                                    // We have arrived at the end of the chunk.
+                                    break;
+                                }
+
+                                lines.push(RelativeBytePos(index) + output_offset);
+
+                                // Clear the bit, so we can find the next one.
+                                newlines_mask &= (!1) << index;
+                            }
+
+                            // We are done for this chunk. All control characters were
+                            // newlines and we took care of those.
+                            continue;
+                        } else {
+                            // Some of the control characters are not newlines,
+                            // fall through to the slow path below.
+                        }
+                    } else {
+                        // No control characters, nothing to record for this chunk
+                        continue;
+                    }
+                }
+
+                // The slow path.
+                // There are control chars in here, fallback to generic decoding.
+                let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
+                intra_chunk_offset = analyze_source_file_generic(
+                    &src[scan_start..],
+                    CHUNK_SIZE - intra_chunk_offset,
+                    RelativeBytePos::from_usize(scan_start),
+                    lines,
+                    multi_byte_chars,
+                );
+            }
+
+            // There might still be a tail left to analyze
+            let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
+            if tail_start < src.len() {
+                analyze_source_file_generic(
+                    &src[tail_start..],
+                    src.len() - tail_start,
+                    RelativeBytePos::from_usize(tail_start),
+                    lines,
+                    multi_byte_chars,
+                );
+            }
+        }
+    }
+    _ => {
+        // The target (or compiler version) does not support SSE2 ...
+        fn analyze_source_file_dispatch(
+            src: &str,
+            lines: &mut Vec<RelativeBytePos>,
+            multi_byte_chars: &mut Vec<MultiByteChar>,
+        ) {
+            analyze_source_file_generic(
+                src,
+                src.len(),
+                RelativeBytePos::from_u32(0),
+                lines,
+                multi_byte_chars,
+            );
+        }
+    }
+}
+
 // `scan_len` determines the number of bytes in `src` to scan. Note that the
 // function can read past `scan_len` if a multi-byte character start within the
 // range but extends past it. The overflow is returned by the function.
author	Caio <c410.f3r@gmail.com>	2024-12-22 17:12:42 -0300
committer	Caio <c410.f3r@gmail.com>	2024-12-22 17:12:42 -0300
commit	c89f0dc01d08cbc79a7bcb8a308585fb1a6544f1 (patch)
tree	145aaea6a93c0d9f968ffa87fafb9f4d0ba2f849 /compiler/rustc_span/src/analyze_source_file.rs
parent	426d1734238e3c5f52e935ba4f617f3a9f43b59d (diff)
download	rust-c89f0dc01d08cbc79a7bcb8a308585fb1a6544f1.tar.gz rust-c89f0dc01d08cbc79a7bcb8a308585fb1a6544f1.zip