diff options
Diffstat (limited to 'src/libsyntax_pos/lib.rs')
| -rw-r--r-- | src/libsyntax_pos/lib.rs | 64 |
1 files changed, 60 insertions, 4 deletions
diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index a307a5c0b37..9034f8c1afd 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -855,6 +855,15 @@ impl Sub<BytePos> for NonNarrowChar { } } +/// Identifies an offset of a character that was normalized away from `SourceFile`. +#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq, Debug)] +pub struct NormalizedPos { + /// The absolute offset of the character in the `SourceMap`. + pub pos: BytePos, + /// The difference between original and normalized string at position. + pub diff: u32, +} + /// The state of the lazy external source loading mechanism of a `SourceFile`. #[derive(PartialEq, Eq, Clone)] pub enum ExternalSource { @@ -918,6 +927,8 @@ pub struct SourceFile { pub multibyte_chars: Vec<MultiByteChar>, /// Width of characters that are not narrow in the source code. pub non_narrow_chars: Vec<NonNarrowChar>, + /// Locations of characters removed during normalization. + pub normalized_pos: Vec<NormalizedPos>, /// A hash of the filename, used for speeding up hashing in incremental compilation. pub name_hash: u128, } @@ -984,6 +995,9 @@ impl Encodable for SourceFile { })?; s.emit_struct_field("name_hash", 8, |s| { self.name_hash.encode(s) + })?; + s.emit_struct_field("normalized_pos", 9, |s| { + self.normalized_pos.encode(s) }) }) } @@ -1034,6 +1048,8 @@ impl Decodable for SourceFile { d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?; let name_hash: u128 = d.read_struct_field("name_hash", 8, |d| Decodable::decode(d))?; + let normalized_pos: Vec<NormalizedPos> = + d.read_struct_field("normalized_pos", 9, |d| Decodable::decode(d))?; Ok(SourceFile { name, name_was_remapped, @@ -1050,6 +1066,7 @@ impl Decodable for SourceFile { lines, multibyte_chars, non_narrow_chars, + normalized_pos, name_hash, }) }) @@ -1068,8 +1085,7 @@ impl SourceFile { unmapped_path: FileName, mut src: String, start_pos: BytePos) -> Result<SourceFile, OffsetOverflowError> { - remove_bom(&mut src); - normalize_newlines(&mut src); + let normalized_pos = normalize_src(&mut src, start_pos); let src_hash = { let mut hasher: StableHasher = StableHasher::new(); @@ -1102,6 +1118,7 @@ impl SourceFile { lines, multibyte_chars, non_narrow_chars, + normalized_pos, name_hash, }) } @@ -1228,12 +1245,44 @@ impl SourceFile { pub fn contains(&self, byte_pos: BytePos) -> bool { byte_pos >= self.start_pos && byte_pos <= self.end_pos } + + /// Calculates the original byte position relative to the start of the file + /// based on the given byte position. + pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos { + + // Diff before any records is 0. Otherwise use the previously recorded + // diff as that applies to the following characters until a new diff + // is recorded. + let diff = match self.normalized_pos.binary_search_by( + |np| np.pos.cmp(&pos)) { + Ok(i) => self.normalized_pos[i].diff, + Err(i) if i == 0 => 0, + Err(i) => self.normalized_pos[i-1].diff, + }; + + BytePos::from_u32(pos.0 - self.start_pos.0 + diff) + } +} + +/// Normalizes the source code and records the normalizations. +fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> { + let mut normalized_pos = vec![]; + remove_bom(src, &mut normalized_pos); + normalize_newlines(src, &mut normalized_pos); + + // Offset all the positions by start_pos to match the final file positions. + for np in &mut normalized_pos { + np.pos.0 += start_pos.0; + } + + normalized_pos } /// Removes UTF-8 BOM, if any. -fn remove_bom(src: &mut String) { +fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { if src.starts_with("\u{feff}") { src.drain(..3); + normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 }); } } @@ -1241,7 +1290,7 @@ fn remove_bom(src: &mut String) { /// Replaces `\r\n` with `\n` in-place in `src`. /// /// Returns error if there's a lone `\r` in the string -fn normalize_newlines(src: &mut String) { +fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { if !src.as_bytes().contains(&b'\r') { return; } @@ -1254,6 +1303,8 @@ fn normalize_newlines(src: &mut String) { let mut buf = std::mem::replace(src, String::new()).into_bytes(); let mut gap_len = 0; let mut tail = buf.as_mut_slice(); + let mut cursor = 0; + let original_gap = normalized_pos.last().map_or(0, |l| l.diff); loop { let idx = match find_crlf(&tail[gap_len..]) { None => tail.len(), @@ -1264,7 +1315,12 @@ fn normalize_newlines(src: &mut String) { if tail.len() == gap_len { break; } + cursor += idx - gap_len; gap_len += 1; + normalized_pos.push(NormalizedPos { + pos: BytePos::from_usize(cursor + 1), + diff: original_gap + gap_len as u32, + }); } // Account for removed `\r`. |
