diff options
| author | Camille GILLOT <gillot.camille@gmail.com> | 2023-09-03 10:15:35 +0000 |
|---|---|---|
| committer | Camille GILLOT <gillot.camille@gmail.com> | 2023-09-03 12:56:10 +0000 |
| commit | 258ace613da6b8c90ba4995738cb13791388c4bb (patch) | |
| tree | 86baeb6af697ff1ce06e94ef72305a8bd5b8029c /compiler/rustc_span/src | |
| parent | 585bb5e68d85c1e4f32da3e2499343372626a436 (diff) | |
| download | rust-258ace613da6b8c90ba4995738cb13791388c4bb.tar.gz rust-258ace613da6b8c90ba4995738cb13791388c4bb.zip | |
Use relative positions inside a SourceFile.
Diffstat (limited to 'compiler/rustc_span/src')
| -rw-r--r-- | compiler/rustc_span/src/analyze_source_file.rs | 48 | ||||
| -rw-r--r-- | compiler/rustc_span/src/analyze_source_file/tests.rs | 29 | ||||
| -rw-r--r-- | compiler/rustc_span/src/caching_source_map_view.rs | 15 | ||||
| -rw-r--r-- | compiler/rustc_span/src/lib.rs | 164 | ||||
| -rw-r--r-- | compiler/rustc_span/src/source_map.rs | 72 | ||||
| -rw-r--r-- | compiler/rustc_span/src/source_map/tests.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_span/src/tests.rs | 28 |
7 files changed, 167 insertions, 196 deletions
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs index 26cd54210d0..450d5455ff9 100644 --- a/compiler/rustc_span/src/analyze_source_file.rs +++ b/compiler/rustc_span/src/analyze_source_file.rs @@ -11,26 +11,19 @@ mod tests; /// is detected at runtime. pub fn analyze_source_file( src: &str, - source_file_start_pos: BytePos, -) -> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) { - let mut lines = vec![source_file_start_pos]; +) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) { + let mut lines = vec![RelativeBytePos::from_u32(0)]; let mut multi_byte_chars = vec![]; let mut non_narrow_chars = vec![]; // Calls the right implementation, depending on hardware support available. - analyze_source_file_dispatch( - src, - source_file_start_pos, - &mut lines, - &mut multi_byte_chars, - &mut non_narrow_chars, - ); + analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars, &mut non_narrow_chars); // The code above optimistically registers a new line *after* each \n // it encounters. If that point is already outside the source_file, remove // it again. if let Some(&last_line_start) = lines.last() { - let source_file_end = source_file_start_pos + BytePos::from_usize(src.len()); + let source_file_end = RelativeBytePos::from_usize(src.len()); assert!(source_file_end >= last_line_start); if last_line_start == source_file_end { lines.pop(); @@ -43,14 +36,12 @@ pub fn analyze_source_file( cfg_if::cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { fn analyze_source_file_dispatch(src: &str, - source_file_start_pos: BytePos, - lines: &mut Vec<BytePos>, + lines: &mut Vec<RelativeBytePos>, multi_byte_chars: &mut Vec<MultiByteChar>, non_narrow_chars: &mut Vec<NonNarrowChar>) { if is_x86_feature_detected!("sse2") { unsafe { analyze_source_file_sse2(src, - source_file_start_pos, lines, multi_byte_chars, non_narrow_chars); @@ -58,7 +49,7 @@ cfg_if::cfg_if! { } else { analyze_source_file_generic(src, src.len(), - source_file_start_pos, + RelativeBytePos::from_u32(0), lines, multi_byte_chars, non_narrow_chars); @@ -72,8 +63,7 @@ cfg_if::cfg_if! { /// SSE2 intrinsics to quickly find all newlines. #[target_feature(enable = "sse2")] unsafe fn analyze_source_file_sse2(src: &str, - output_offset: BytePos, - lines: &mut Vec<BytePos>, + lines: &mut Vec<RelativeBytePos>, multi_byte_chars: &mut Vec<MultiByteChar>, non_narrow_chars: &mut Vec<NonNarrowChar>) { #[cfg(target_arch = "x86")] @@ -129,8 +119,7 @@ cfg_if::cfg_if! { if control_char_mask == newlines_mask { // All control characters are newlines, record them let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32; - let output_offset = output_offset + - BytePos::from_usize(chunk_index * CHUNK_SIZE + 1); + let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1); loop { let index = newlines_mask.trailing_zeros(); @@ -140,7 +129,7 @@ cfg_if::cfg_if! { break } - lines.push(BytePos(index) + output_offset); + lines.push(RelativeBytePos(index) + output_offset); // Clear the bit, so we can find the next one. newlines_mask &= (!1) << index; @@ -165,7 +154,7 @@ cfg_if::cfg_if! { intra_chunk_offset = analyze_source_file_generic( &src[scan_start .. ], CHUNK_SIZE - intra_chunk_offset, - BytePos::from_usize(scan_start) + output_offset, + RelativeBytePos::from_usize(scan_start), lines, multi_byte_chars, non_narrow_chars @@ -177,7 +166,7 @@ cfg_if::cfg_if! { if tail_start < src.len() { analyze_source_file_generic(&src[tail_start ..], src.len() - tail_start, - output_offset + BytePos::from_usize(tail_start), + RelativeBytePos::from_usize(tail_start), lines, multi_byte_chars, non_narrow_chars); @@ -187,13 +176,12 @@ cfg_if::cfg_if! { // The target (or compiler version) does not support SSE2 ... fn analyze_source_file_dispatch(src: &str, - source_file_start_pos: BytePos, - lines: &mut Vec<BytePos>, + lines: &mut Vec<RelativeBytePos>, multi_byte_chars: &mut Vec<MultiByteChar>, non_narrow_chars: &mut Vec<NonNarrowChar>) { analyze_source_file_generic(src, src.len(), - source_file_start_pos, + RelativeBytePos::from_u32(0), lines, multi_byte_chars, non_narrow_chars); @@ -207,8 +195,8 @@ cfg_if::cfg_if! { fn analyze_source_file_generic( src: &str, scan_len: usize, - output_offset: BytePos, - lines: &mut Vec<BytePos>, + output_offset: RelativeBytePos, + lines: &mut Vec<RelativeBytePos>, multi_byte_chars: &mut Vec<MultiByteChar>, non_narrow_chars: &mut Vec<NonNarrowChar>, ) -> usize { @@ -230,11 +218,11 @@ fn analyze_source_file_generic( // This is an ASCII control character, it could be one of the cases // that are interesting to us. - let pos = BytePos::from_usize(i) + output_offset; + let pos = RelativeBytePos::from_usize(i) + output_offset; match byte { b'\n' => { - lines.push(pos + BytePos(1)); + lines.push(pos + RelativeBytePos(1)); } b'\t' => { non_narrow_chars.push(NonNarrowChar::Tab(pos)); @@ -250,7 +238,7 @@ fn analyze_source_file_generic( let c = src[i..].chars().next().unwrap(); char_len = c.len_utf8(); - let pos = BytePos::from_usize(i) + output_offset; + let pos = RelativeBytePos::from_usize(i) + output_offset; if char_len > 1 { assert!((2..=4).contains(&char_len)); diff --git a/compiler/rustc_span/src/analyze_source_file/tests.rs b/compiler/rustc_span/src/analyze_source_file/tests.rs index 66aefc9a787..0c77d080c17 100644 --- a/compiler/rustc_span/src/analyze_source_file/tests.rs +++ b/compiler/rustc_span/src/analyze_source_file/tests.rs @@ -3,29 +3,28 @@ use super::*; macro_rules! test { (case: $test_name:ident, text: $text:expr, - source_file_start_pos: $source_file_start_pos:expr, lines: $lines:expr, multi_byte_chars: $multi_byte_chars:expr, non_narrow_chars: $non_narrow_chars:expr,) => { #[test] fn $test_name() { - let (lines, multi_byte_chars, non_narrow_chars) = - analyze_source_file($text, BytePos($source_file_start_pos)); + let (lines, multi_byte_chars, non_narrow_chars) = analyze_source_file($text); - let expected_lines: Vec<BytePos> = $lines.into_iter().map(BytePos).collect(); + let expected_lines: Vec<RelativeBytePos> = + $lines.into_iter().map(RelativeBytePos).collect(); assert_eq!(lines, expected_lines); let expected_mbcs: Vec<MultiByteChar> = $multi_byte_chars .into_iter() - .map(|(pos, bytes)| MultiByteChar { pos: BytePos(pos), bytes }) + .map(|(pos, bytes)| MultiByteChar { pos: RelativeBytePos(pos), bytes }) .collect(); assert_eq!(multi_byte_chars, expected_mbcs); let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars .into_iter() - .map(|(pos, width)| NonNarrowChar::new(BytePos(pos), width)) + .map(|(pos, width)| NonNarrowChar::new(RelativeBytePos(pos), width)) .collect(); assert_eq!(non_narrow_chars, expected_nncs); @@ -36,7 +35,6 @@ macro_rules! test { test!( case: empty_text, text: "", - source_file_start_pos: 0, lines: vec![], multi_byte_chars: vec![], non_narrow_chars: vec![], @@ -45,7 +43,6 @@ test!( test!( case: newlines_short, text: "a\nc", - source_file_start_pos: 0, lines: vec![0, 2], multi_byte_chars: vec![], non_narrow_chars: vec![], @@ -54,7 +51,6 @@ test!( test!( case: newlines_long, text: "012345678\nabcdef012345678\na", - source_file_start_pos: 0, lines: vec![0, 10, 26], multi_byte_chars: vec![], non_narrow_chars: vec![], @@ -63,7 +59,6 @@ test!( test!( case: newline_and_multi_byte_char_in_same_chunk, text: "01234β789\nbcdef0123456789abcdef", - source_file_start_pos: 0, lines: vec![0, 11], multi_byte_chars: vec![(5, 2)], non_narrow_chars: vec![], @@ -72,7 +67,6 @@ test!( test!( case: newline_and_control_char_in_same_chunk, text: "01234\u{07}6789\nbcdef0123456789abcdef", - source_file_start_pos: 0, lines: vec![0, 11], multi_byte_chars: vec![], non_narrow_chars: vec![(5, 0)], @@ -81,7 +75,6 @@ test!( test!( case: multi_byte_char_short, text: "aβc", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![(1, 2)], non_narrow_chars: vec![], @@ -90,7 +83,6 @@ test!( test!( case: multi_byte_char_long, text: "0123456789abcΔf012345β", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![(13, 2), (22, 2)], non_narrow_chars: vec![], @@ -99,7 +91,6 @@ test!( test!( case: multi_byte_char_across_chunk_boundary, text: "0123456789abcdeΔ123456789abcdef01234", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![(15, 2)], non_narrow_chars: vec![], @@ -108,7 +99,6 @@ test!( test!( case: multi_byte_char_across_chunk_boundary_tail, text: "0123456789abcdeΔ....", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![(15, 2)], non_narrow_chars: vec![], @@ -117,7 +107,6 @@ test!( test!( case: non_narrow_short, text: "0\t2", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![], non_narrow_chars: vec![(1, 4)], @@ -126,7 +115,6 @@ test!( test!( case: non_narrow_long, text: "01\t3456789abcdef01234567\u{07}9", - source_file_start_pos: 0, lines: vec![0], multi_byte_chars: vec![], non_narrow_chars: vec![(2, 4), (24, 0)], @@ -135,8 +123,7 @@ test!( test!( case: output_offset_all, text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf", - source_file_start_pos: 1000, - lines: vec![0 + 1000, 7 + 1000, 27 + 1000], - multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)], - non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)], + lines: vec![0, 7, 27], + multi_byte_chars: vec![(13, 2), (29, 2)], + non_narrow_chars: vec![(2, 4), (24, 0)], ); diff --git a/compiler/rustc_span/src/caching_source_map_view.rs b/compiler/rustc_span/src/caching_source_map_view.rs index 886112769a9..fbfc5c22fcb 100644 --- a/compiler/rustc_span/src/caching_source_map_view.rs +++ b/compiler/rustc_span/src/caching_source_map_view.rs @@ -1,5 +1,5 @@ use crate::source_map::SourceMap; -use crate::{BytePos, SourceFile, SpanData}; +use crate::{BytePos, Pos, RelativeBytePos, SourceFile, SpanData}; use rustc_data_structures::sync::Lrc; use std::ops::Range; @@ -37,6 +37,7 @@ impl CacheEntry { self.file_index = file_idx; } + let pos = self.file.relative_position(pos); let line_index = self.file.lookup_line(pos).unwrap(); let line_bounds = self.file.line_bounds(line_index); self.line_number = line_index + 1; @@ -79,7 +80,7 @@ impl<'sm> CachingSourceMapView<'sm> { pub fn byte_pos_to_line_and_col( &mut self, pos: BytePos, - ) -> Option<(Lrc<SourceFile>, usize, BytePos)> { + ) -> Option<(Lrc<SourceFile>, usize, RelativeBytePos)> { self.time_stamp += 1; // Check if the position is in one of the cached lines @@ -88,11 +89,8 @@ impl<'sm> CachingSourceMapView<'sm> { let cache_entry = &mut self.line_cache[cache_idx as usize]; cache_entry.touch(self.time_stamp); - return Some(( - cache_entry.file.clone(), - cache_entry.line_number, - pos - cache_entry.line.start, - )); + let col = RelativeBytePos(pos.to_u32() - cache_entry.line.start.to_u32()); + return Some((cache_entry.file.clone(), cache_entry.line_number, col)); } // No cache hit ... @@ -108,7 +106,8 @@ impl<'sm> CachingSourceMapView<'sm> { let cache_entry = &mut self.line_cache[oldest]; cache_entry.update(new_file_and_idx, pos, self.time_stamp); - Some((cache_entry.file.clone(), cache_entry.line_number, pos - cache_entry.line.start)) + let col = RelativeBytePos(pos.to_u32() - cache_entry.line.start.to_u32()); + Some((cache_entry.file.clone(), cache_entry.line_number, col)) } pub fn span_data_to_lines_and_cols( diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 62fe49fe2a2..b1cde3093ef 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -1107,27 +1107,27 @@ impl fmt::Debug for SpanData { } /// Identifies an offset of a multi-byte character in a `SourceFile`. -#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)] pub struct MultiByteChar { /// The absolute offset of the character in the `SourceMap`. - pub pos: BytePos, + pub pos: RelativeBytePos, /// The number of bytes, `>= 2`. pub bytes: u8, } /// Identifies an offset of a non-narrow character in a `SourceFile`. -#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)] pub enum NonNarrowChar { /// Represents a zero-width character. - ZeroWidth(BytePos), + ZeroWidth(RelativeBytePos), /// Represents a wide (full-width) character. - Wide(BytePos), + Wide(RelativeBytePos), /// Represents a tab character, represented visually with a width of 4 characters. - Tab(BytePos), + Tab(RelativeBytePos), } impl NonNarrowChar { - fn new(pos: BytePos, width: usize) -> Self { + fn new(pos: RelativeBytePos, width: usize) -> Self { match width { 0 => NonNarrowChar::ZeroWidth(pos), 2 => NonNarrowChar::Wide(pos), @@ -1137,7 +1137,7 @@ impl NonNarrowChar { } /// Returns the absolute offset of the character in the `SourceMap`. - pub fn pos(&self) -> BytePos { + pub fn pos(&self) -> RelativeBytePos { match *self { NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p, } @@ -1153,10 +1153,10 @@ impl NonNarrowChar { } } -impl Add<BytePos> for NonNarrowChar { +impl Add<RelativeBytePos> for NonNarrowChar { type Output = Self; - fn add(self, rhs: BytePos) -> Self { + fn add(self, rhs: RelativeBytePos) -> Self { match self { NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs), NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs), @@ -1165,10 +1165,10 @@ impl Add<BytePos> for NonNarrowChar { } } -impl Sub<BytePos> for NonNarrowChar { +impl Sub<RelativeBytePos> for NonNarrowChar { type Output = Self; - fn sub(self, rhs: BytePos) -> Self { + fn sub(self, rhs: RelativeBytePos) -> Self { match self { NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs), NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs), @@ -1178,10 +1178,10 @@ impl Sub<BytePos> for NonNarrowChar { } /// Identifies an offset of a character that was normalized away from `SourceFile`. -#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)] +#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)] pub struct NormalizedPos { /// The absolute offset of the character in the `SourceMap`. - pub pos: BytePos, + pub pos: RelativeBytePos, /// The difference between original and normalized string at position. pub diff: u32, } @@ -1293,7 +1293,7 @@ impl SourceFileHash { #[derive(Clone)] pub enum SourceFileLines { /// The source file lines, in decoded (random-access) form. - Lines(Vec<BytePos>), + Lines(Vec<RelativeBytePos>), /// The source file lines, in undecoded difference list form. Diffs(SourceFileDiffs), @@ -1317,7 +1317,7 @@ pub struct SourceFileDiffs { /// Position of the first line. Note that this is always encoded as a /// `BytePos` because it is often much larger than any of the /// differences. - line_start: BytePos, + line_start: RelativeBytePos, /// Always 1, 2, or 4. Always as small as possible, while being big /// enough to hold the length of the longest line in the source file. @@ -1352,7 +1352,7 @@ pub struct SourceFile { /// The start position of this source in the `SourceMap`. pub start_pos: BytePos, /// The end position of this source in the `SourceMap`. - pub end_pos: BytePos, + pub source_len: RelativeBytePos, /// Locations of lines beginnings in the source code. pub lines: Lock<SourceFileLines>, /// Locations of multi-byte characters in the source code. @@ -1375,7 +1375,7 @@ impl Clone for SourceFile { src_hash: self.src_hash, external_src: Lock::new(self.external_src.borrow().clone()), start_pos: self.start_pos, - end_pos: self.end_pos, + source_len: self.source_len, lines: Lock::new(self.lines.borrow().clone()), multibyte_chars: self.multibyte_chars.clone(), non_narrow_chars: self.non_narrow_chars.clone(), @@ -1390,8 +1390,8 @@ impl<S: Encoder> Encodable<S> for SourceFile { fn encode(&self, s: &mut S) { self.name.encode(s); self.src_hash.encode(s); - self.start_pos.encode(s); - self.end_pos.encode(s); + // Do not encode `start_pos` as it's global state for this session. + self.source_len.encode(s); // We are always in `Lines` form by the time we reach here. assert!(self.lines.borrow().is_lines()); @@ -1465,8 +1465,7 @@ impl<D: Decoder> Decodable<D> for SourceFile { fn decode(d: &mut D) -> SourceFile { let name: FileName = Decodable::decode(d); let src_hash: SourceFileHash = Decodable::decode(d); - let start_pos: BytePos = Decodable::decode(d); - let end_pos: BytePos = Decodable::decode(d); + let source_len: RelativeBytePos = Decodable::decode(d); let lines = { let num_lines: u32 = Decodable::decode(d); if num_lines > 0 { @@ -1474,7 +1473,7 @@ impl<D: Decoder> Decodable<D> for SourceFile { let bytes_per_diff = d.read_u8() as usize; // Read the first element. - let line_start: BytePos = Decodable::decode(d); + let line_start: RelativeBytePos = Decodable::decode(d); // Read the difference list. let num_diffs = num_lines as usize - 1; @@ -1496,8 +1495,8 @@ impl<D: Decoder> Decodable<D> for SourceFile { let cnum: CrateNum = Decodable::decode(d); SourceFile { name, - start_pos, - end_pos, + start_pos: BytePos::from_u32(0), + source_len, src: None, src_hash, // Unused - the metadata decoder will construct @@ -1520,34 +1519,29 @@ impl fmt::Debug for SourceFile { } impl SourceFile { - pub fn new( - name: FileName, - mut src: String, - start_pos: BytePos, - hash_kind: SourceFileHashAlgorithm, - ) -> Self { + pub fn new(name: FileName, mut src: String, hash_kind: SourceFileHashAlgorithm) -> Self { // Compute the file hash before any normalization. let src_hash = SourceFileHash::new(hash_kind, &src); - let normalized_pos = normalize_src(&mut src, start_pos); + let normalized_pos = normalize_src(&mut src); let name_hash = { let mut hasher: StableHasher = StableHasher::new(); name.hash(&mut hasher); hasher.finish() }; - let end_pos = start_pos.to_usize() + src.len(); - assert!(end_pos <= u32::MAX as usize); + let source_len = src.len(); + assert!(source_len <= u32::MAX as usize); let (lines, multibyte_chars, non_narrow_chars) = - analyze_source_file::analyze_source_file(&src, start_pos); + analyze_source_file::analyze_source_file(&src); SourceFile { name, src: Some(Lrc::new(src)), src_hash, external_src: Lock::new(ExternalSource::Unneeded), - start_pos, - end_pos: Pos::from_usize(end_pos), + start_pos: BytePos::from_u32(0), + source_len: RelativeBytePos::from_usize(source_len), lines: Lock::new(SourceFileLines::Lines(lines)), multibyte_chars, non_narrow_chars, @@ -1559,7 +1553,7 @@ impl SourceFile { pub fn lines<F, R>(&self, f: F) -> R where - F: FnOnce(&[BytePos]) -> R, + F: FnOnce(&[RelativeBytePos]) -> R, { let mut guard = self.lines.borrow_mut(); match &*guard { @@ -1579,7 +1573,7 @@ impl SourceFile { match bytes_per_diff { 1 => { lines.extend(raw_diffs.into_iter().map(|&diff| { - line_start = line_start + BytePos(diff as u32); + line_start = line_start + RelativeBytePos(diff as u32); line_start })); } @@ -1588,7 +1582,7 @@ impl SourceFile { let pos = bytes_per_diff * i; let bytes = [raw_diffs[pos], raw_diffs[pos + 1]]; let diff = u16::from_le_bytes(bytes); - line_start = line_start + BytePos(diff as u32); + line_start = line_start + RelativeBytePos(diff as u32); line_start })); } @@ -1602,7 +1596,7 @@ impl SourceFile { raw_diffs[pos + 3], ]; let diff = u32::from_le_bytes(bytes); - line_start = line_start + BytePos(diff); + line_start = line_start + RelativeBytePos(diff); line_start })); } @@ -1617,8 +1611,10 @@ impl SourceFile { /// Returns the `BytePos` of the beginning of the current line. pub fn line_begin_pos(&self, pos: BytePos) -> BytePos { + let pos = self.relative_position(pos); let line_index = self.lookup_line(pos).unwrap(); - self.lines(|lines| lines[line_index]) + let line_start_pos = self.lines(|lines| lines[line_index]); + self.absolute_position(line_start_pos) } /// Add externally loaded source. @@ -1643,7 +1639,7 @@ impl SourceFile { if let Some(mut src) = src { // The src_hash needs to be computed on the pre-normalized src. if self.src_hash.matches(&src) { - normalize_src(&mut src, BytePos::from_usize(0)); + normalize_src(&mut src); *src_kind = ExternalSourceKind::Present(Lrc::new(src)); return true; } @@ -1676,8 +1672,7 @@ impl SourceFile { let begin = { let line = self.lines(|lines| lines.get(line_number).copied())?; - let begin: BytePos = line - self.start_pos; - begin.to_usize() + line.to_usize() }; if let Some(ref src) = self.src { @@ -1703,25 +1698,41 @@ impl SourceFile { self.lines(|lines| lines.len()) } + #[inline] + pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos { + BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32()) + } + + #[inline] + pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos { + RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32()) + } + + #[inline] + pub fn end_position(&self) -> BytePos { + self.absolute_position(self.source_len) + } + /// Finds the line containing the given position. The return value is the /// index into the `lines` array of this `SourceFile`, not the 1-based line /// number. If the source_file is empty or the position is located before the /// first line, `None` is returned. - pub fn lookup_line(&self, pos: BytePos) -> Option<usize> { + pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> { self.lines(|lines| lines.partition_point(|x| x <= &pos).checked_sub(1)) } pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> { if self.is_empty() { - return self.start_pos..self.end_pos; + return self.start_pos..self.start_pos; } self.lines(|lines| { assert!(line_index < lines.len()); if line_index == (lines.len() - 1) { - lines[line_index]..self.end_pos + self.absolute_position(lines[line_index])..self.end_position() } else { - lines[line_index]..lines[line_index + 1] + self.absolute_position(lines[line_index]) + ..self.absolute_position(lines[line_index + 1]) } }) } @@ -1732,17 +1743,19 @@ impl SourceFile { /// returns true still contain one byte position according to this function. #[inline] pub fn contains(&self, byte_pos: BytePos) -> bool { - byte_pos >= self.start_pos && byte_pos <= self.end_pos + byte_pos >= self.start_pos && byte_pos <= self.end_position() } #[inline] pub fn is_empty(&self) -> bool { - self.start_pos == self.end_pos + self.source_len.to_u32() == 0 } /// Calculates the original byte position relative to the start of the file /// based on the given byte position. - pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos { + pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos { + let pos = self.relative_position(pos); + // Diff before any records is 0. Otherwise use the previously recorded // diff as that applies to the following characters until a new diff // is recorded. @@ -1752,7 +1765,7 @@ impl SourceFile { Err(i) => self.normalized_pos[i - 1].diff, }; - BytePos::from_u32(pos.0 - self.start_pos.0 + diff) + RelativeBytePos::from_u32(pos.0 + diff) } /// Calculates a normalized byte position from a byte offset relative to the @@ -1778,7 +1791,7 @@ impl SourceFile { } /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`. - pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { + fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos { // The number of extra bytes due to multibyte chars in the `SourceFile`. let mut total_extra_bytes = 0; @@ -1796,13 +1809,13 @@ impl SourceFile { } } - assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32()); - CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize) + assert!(total_extra_bytes <= bpos.to_u32()); + CharPos(bpos.to_usize() - total_extra_bytes as usize) } /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a /// given `BytePos`. - pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) { + fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) { let chpos = self.bytepos_to_file_charpos(pos); match self.lookup_line(pos) { Some(a) => { @@ -1823,6 +1836,7 @@ impl SourceFile { /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based) /// column offset when displayed, for a given `BytePos`. pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) { + let pos = self.relative_position(pos); let (line, col_or_chpos) = self.lookup_file_pos(pos); if line > 0 { let col = col_or_chpos; @@ -1861,16 +1875,10 @@ impl SourceFile { } /// Normalizes the source code and records the normalizations. -fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> { +fn normalize_src(src: &mut String) -> Vec<NormalizedPos> { let mut normalized_pos = vec![]; remove_bom(src, &mut normalized_pos); normalize_newlines(src, &mut normalized_pos); - - // Offset all the positions by start_pos to match the final file positions. - for np in &mut normalized_pos { - np.pos.0 += start_pos.0; - } - normalized_pos } @@ -1878,7 +1886,7 @@ fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> { fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { if src.starts_with('\u{feff}') { src.drain(..3); - normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 }); + normalized_pos.push(NormalizedPos { pos: RelativeBytePos(0), diff: 3 }); } } @@ -1913,7 +1921,7 @@ fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) cursor += idx - gap_len; gap_len += 1; normalized_pos.push(NormalizedPos { - pos: BytePos::from_usize(cursor + 1), + pos: RelativeBytePos::from_usize(cursor + 1), diff: original_gap + gap_len as u32, }); } @@ -2015,6 +2023,12 @@ impl_pos! { #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] pub struct BytePos(pub u32); + /// A byte offset relative to file beginning. + /// + /// Keep this small (currently 32-bits), as AST contains a lot of them. + #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] + pub struct RelativeBytePos(pub u32); + /// A character offset. /// /// Because of multibyte UTF-8 characters, a byte offset @@ -2036,6 +2050,24 @@ impl<D: Decoder> Decodable<D> for BytePos { } } +impl<H: HashStableContext> HashStable<H> for RelativeBytePos { + fn hash_stable(&self, hcx: &mut H, hasher: &mut StableHasher) { + self.0.hash_stable(hcx, hasher); + } +} + +impl<S: Encoder> Encodable<S> for RelativeBytePos { + fn encode(&self, s: &mut S) { + s.emit_u32(self.0); + } +} + +impl<D: Decoder> Decodable<D> for RelativeBytePos { + fn decode(d: &mut D) -> RelativeBytePos { + RelativeBytePos(d.read_u32()) + } +} + // _____________________________________________________________________________ // Loc, SourceFileAndLine, SourceFileAndBytePos // diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 1cff021ba41..07483280f41 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -314,21 +314,18 @@ impl SourceMap { let lrc_sf = match self.source_file_by_stable_id(file_id) { Some(lrc_sf) => lrc_sf, None => { - let start_pos = self.allocate_address_space(src.len())?; - - let source_file = Lrc::new(SourceFile::new( - filename, - src, - Pos::from_usize(start_pos), - self.hash_kind, - )); + let mut source_file = SourceFile::new(filename, src, self.hash_kind); // Let's make sure the file_id we generated above actually matches // the ID we generate for the SourceFile we just created. debug_assert_eq!(StableSourceFileId::new(&source_file), file_id); + let start_pos = self.allocate_address_space(source_file.source_len.to_usize())?; + source_file.start_pos = BytePos::from_usize(start_pos); + let mut files = self.files.borrow_mut(); + let source_file = Lrc::new(source_file); files.source_files.push(source_file.clone()); files.stable_id_to_source_file.insert(file_id, source_file.clone()); @@ -350,48 +347,16 @@ impl SourceMap { source_len: usize, cnum: CrateNum, file_local_lines: Lock<SourceFileLines>, - mut file_local_multibyte_chars: Vec<MultiByteChar>, - mut file_local_non_narrow_chars: Vec<NonNarrowChar>, - mut file_local_normalized_pos: Vec<NormalizedPos>, - original_start_pos: BytePos, + multibyte_chars: Vec<MultiByteChar>, + non_narrow_chars: Vec<NonNarrowChar>, + normalized_pos: Vec<NormalizedPos>, metadata_index: u32, ) -> Lrc<SourceFile> { let start_pos = self .allocate_address_space(source_len) .expect("not enough address space for imported source file"); - let end_pos = Pos::from_usize(start_pos + source_len); - let start_pos = Pos::from_usize(start_pos); - - // Translate these positions into the new global frame of reference, - // now that the offset of the SourceFile is known. - // - // These are all unsigned values. `original_start_pos` may be larger or - // smaller than `start_pos`, but `pos` is always larger than both. - // Therefore, `(pos - original_start_pos) + start_pos` won't overflow - // but `start_pos - original_start_pos` might. So we use the former - // form rather than pre-computing the offset into a local variable. The - // compiler backend can optimize away the repeated computations in a - // way that won't trigger overflow checks. - match &mut *file_local_lines.borrow_mut() { - SourceFileLines::Lines(lines) => { - for pos in lines { - *pos = (*pos - original_start_pos) + start_pos; - } - } - SourceFileLines::Diffs(SourceFileDiffs { line_start, .. }) => { - *line_start = (*line_start - original_start_pos) + start_pos; - } - } - for mbc in &mut file_local_multibyte_chars { - mbc.pos = (mbc.pos - original_start_pos) + start_pos; - } - for swc in &mut file_local_non_narrow_chars { - *swc = (*swc - original_start_pos) + start_pos; - } - for nc in &mut file_local_normalized_pos { - nc.pos = (nc.pos - original_start_pos) + start_pos; - } + let source_len = RelativeBytePos::from_usize(source_len); let source_file = Lrc::new(SourceFile { name: filename, @@ -401,12 +366,12 @@ impl SourceMap { kind: ExternalSourceKind::AbsentOk, metadata_index, }), - start_pos, - end_pos, + start_pos: BytePos::from_usize(start_pos), + source_len, lines: file_local_lines, - multibyte_chars: file_local_multibyte_chars, - non_narrow_chars: file_local_non_narrow_chars, - normalized_pos: file_local_normalized_pos, + multibyte_chars, + non_narrow_chars, + normalized_pos, name_hash, cnum, }); @@ -452,6 +417,7 @@ impl SourceMap { pub fn lookup_line(&self, pos: BytePos) -> Result<SourceFileAndLine, Lrc<SourceFile>> { let f = self.lookup_source_file(pos); + let pos = f.relative_position(pos); match f.lookup_line(pos) { Some(line) => Ok(SourceFileAndLine { sf: f, line }), None => Err(f), @@ -547,7 +513,9 @@ impl SourceMap { return true; } let f = (*self.files.borrow().source_files)[lo].clone(); - f.lookup_line(sp.lo()) != f.lookup_line(sp.hi()) + let lo = f.relative_position(sp.lo()); + let hi = f.relative_position(sp.hi()); + f.lookup_line(lo) != f.lookup_line(hi) } #[instrument(skip(self), level = "trace")] @@ -627,7 +595,7 @@ impl SourceMap { let start_index = local_begin.pos.to_usize(); let end_index = local_end.pos.to_usize(); - let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); + let source_len = local_begin.sf.source_len.to_usize(); if start_index > end_index || end_index > source_len { return Err(SpanSnippetError::MalformedForSourcemap(MalformedSourceMapPositions { @@ -1034,7 +1002,7 @@ impl SourceMap { return 1; } - let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); + let source_len = local_begin.sf.source_len.to_usize(); debug!("source_len=`{:?}`", source_len); // Ensure indexes are also not malformed. if start_index > end_index || end_index > source_len - 1 { diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 686b3b00d70..d5cb3dac55c 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -50,6 +50,7 @@ impl SourceMap { fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { let idx = self.lookup_source_file_idx(bpos); let sf = &(*self.files.borrow().source_files)[idx]; + let bpos = sf.relative_position(bpos); sf.bytepos_to_file_charpos(bpos) } } @@ -230,8 +231,7 @@ fn t10() { let SourceFile { name, src_hash, - start_pos, - end_pos, + source_len, lines, multibyte_chars, non_narrow_chars, @@ -244,13 +244,12 @@ fn t10() { name, src_hash, name_hash, - (end_pos - start_pos).to_usize(), + source_len.to_usize(), CrateNum::new(0), lines, multibyte_chars, non_narrow_chars, normalized_pos, - start_pos, 0, ); diff --git a/compiler/rustc_span/src/tests.rs b/compiler/rustc_span/src/tests.rs index a242ad6d1d7..3b69295bb93 100644 --- a/compiler/rustc_span/src/tests.rs +++ b/compiler/rustc_span/src/tests.rs @@ -3,24 +3,22 @@ use super::*; #[test] fn test_lookup_line() { let source = "abcdefghijklm\nabcdefghij\n...".to_owned(); - let sf = SourceFile::new( - FileName::Anon(Hash64::ZERO), - source, - BytePos(3), - SourceFileHashAlgorithm::Sha256, - ); - sf.lines(|lines| assert_eq!(lines, &[BytePos(3), BytePos(17), BytePos(28)])); + let mut sf = + SourceFile::new(FileName::Anon(Hash64::ZERO), source, SourceFileHashAlgorithm::Sha256); + sf.start_pos = BytePos(3); + sf.lines(|lines| { + assert_eq!(lines, &[RelativeBytePos(0), RelativeBytePos(14), RelativeBytePos(25)]) + }); - assert_eq!(sf.lookup_line(BytePos(0)), None); - assert_eq!(sf.lookup_line(BytePos(3)), Some(0)); - assert_eq!(sf.lookup_line(BytePos(4)), Some(0)); + assert_eq!(sf.lookup_line(RelativeBytePos(0)), Some(0)); + assert_eq!(sf.lookup_line(RelativeBytePos(1)), Some(0)); - assert_eq!(sf.lookup_line(BytePos(16)), Some(0)); - assert_eq!(sf.lookup_line(BytePos(17)), Some(1)); - assert_eq!(sf.lookup_line(BytePos(18)), Some(1)); + assert_eq!(sf.lookup_line(RelativeBytePos(13)), Some(0)); + assert_eq!(sf.lookup_line(RelativeBytePos(14)), Some(1)); + assert_eq!(sf.lookup_line(RelativeBytePos(15)), Some(1)); - assert_eq!(sf.lookup_line(BytePos(28)), Some(2)); - assert_eq!(sf.lookup_line(BytePos(29)), Some(2)); + assert_eq!(sf.lookup_line(RelativeBytePos(25)), Some(2)); + assert_eq!(sf.lookup_line(RelativeBytePos(26)), Some(2)); } #[test] |
