diff options
Diffstat (limited to 'src/libsyntax_pos')
| -rw-r--r-- | src/libsyntax_pos/lib.rs | 64 | ||||
| -rw-r--r-- | src/libsyntax_pos/tests.rs | 31 |
2 files changed, 78 insertions, 17 deletions
diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index a307a5c0b37..9034f8c1afd 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -855,6 +855,15 @@ impl Sub<BytePos> for NonNarrowChar { } } +/// Identifies an offset of a character that was normalized away from `SourceFile`. +#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq, Debug)] +pub struct NormalizedPos { + /// The absolute offset of the character in the `SourceMap`. + pub pos: BytePos, + /// The difference between original and normalized string at position. + pub diff: u32, +} + /// The state of the lazy external source loading mechanism of a `SourceFile`. #[derive(PartialEq, Eq, Clone)] pub enum ExternalSource { @@ -918,6 +927,8 @@ pub struct SourceFile { pub multibyte_chars: Vec<MultiByteChar>, /// Width of characters that are not narrow in the source code. pub non_narrow_chars: Vec<NonNarrowChar>, + /// Locations of characters removed during normalization. + pub normalized_pos: Vec<NormalizedPos>, /// A hash of the filename, used for speeding up hashing in incremental compilation. pub name_hash: u128, } @@ -984,6 +995,9 @@ impl Encodable for SourceFile { })?; s.emit_struct_field("name_hash", 8, |s| { self.name_hash.encode(s) + })?; + s.emit_struct_field("normalized_pos", 9, |s| { + self.normalized_pos.encode(s) }) }) } @@ -1034,6 +1048,8 @@ impl Decodable for SourceFile { d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?; let name_hash: u128 = d.read_struct_field("name_hash", 8, |d| Decodable::decode(d))?; + let normalized_pos: Vec<NormalizedPos> = + d.read_struct_field("normalized_pos", 9, |d| Decodable::decode(d))?; Ok(SourceFile { name, name_was_remapped, @@ -1050,6 +1066,7 @@ impl Decodable for SourceFile { lines, multibyte_chars, non_narrow_chars, + normalized_pos, name_hash, }) }) @@ -1068,8 +1085,7 @@ impl SourceFile { unmapped_path: FileName, mut src: String, start_pos: BytePos) -> Result<SourceFile, OffsetOverflowError> { - remove_bom(&mut src); - normalize_newlines(&mut src); + let normalized_pos = normalize_src(&mut src, start_pos); let src_hash = { let mut hasher: StableHasher = StableHasher::new(); @@ -1102,6 +1118,7 @@ impl SourceFile { lines, multibyte_chars, non_narrow_chars, + normalized_pos, name_hash, }) } @@ -1228,12 +1245,44 @@ impl SourceFile { pub fn contains(&self, byte_pos: BytePos) -> bool { byte_pos >= self.start_pos && byte_pos <= self.end_pos } + + /// Calculates the original byte position relative to the start of the file + /// based on the given byte position. + pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos { + + // Diff before any records is 0. Otherwise use the previously recorded + // diff as that applies to the following characters until a new diff + // is recorded. + let diff = match self.normalized_pos.binary_search_by( + |np| np.pos.cmp(&pos)) { + Ok(i) => self.normalized_pos[i].diff, + Err(i) if i == 0 => 0, + Err(i) => self.normalized_pos[i-1].diff, + }; + + BytePos::from_u32(pos.0 - self.start_pos.0 + diff) + } +} + +/// Normalizes the source code and records the normalizations. +fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> { + let mut normalized_pos = vec![]; + remove_bom(src, &mut normalized_pos); + normalize_newlines(src, &mut normalized_pos); + + // Offset all the positions by start_pos to match the final file positions. + for np in &mut normalized_pos { + np.pos.0 += start_pos.0; + } + + normalized_pos } /// Removes UTF-8 BOM, if any. -fn remove_bom(src: &mut String) { +fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { if src.starts_with("\u{feff}") { src.drain(..3); + normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 }); } } @@ -1241,7 +1290,7 @@ fn remove_bom(src: &mut String) { /// Replaces `\r\n` with `\n` in-place in `src`. /// /// Returns error if there's a lone `\r` in the string -fn normalize_newlines(src: &mut String) { +fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { if !src.as_bytes().contains(&b'\r') { return; } @@ -1254,6 +1303,8 @@ fn normalize_newlines(src: &mut String) { let mut buf = std::mem::replace(src, String::new()).into_bytes(); let mut gap_len = 0; let mut tail = buf.as_mut_slice(); + let mut cursor = 0; + let original_gap = normalized_pos.last().map_or(0, |l| l.diff); loop { let idx = match find_crlf(&tail[gap_len..]) { None => tail.len(), @@ -1264,7 +1315,12 @@ fn normalize_newlines(src: &mut String) { if tail.len() == gap_len { break; } + cursor += idx - gap_len; gap_len += 1; + normalized_pos.push(NormalizedPos { + pos: BytePos::from_usize(cursor + 1), + diff: original_gap + gap_len as u32, + }); } // Account for removed `\r`. diff --git a/src/libsyntax_pos/tests.rs b/src/libsyntax_pos/tests.rs index 6bd6016020a..87cc3505e38 100644 --- a/src/libsyntax_pos/tests.rs +++ b/src/libsyntax_pos/tests.rs @@ -19,20 +19,25 @@ fn test_lookup_line() { #[test] fn test_normalize_newlines() { - fn check(before: &str, after: &str) { + fn check(before: &str, after: &str, expected_positions: &[u32]) { let mut actual = before.to_string(); - normalize_newlines(&mut actual); + let mut actual_positions = vec![]; + normalize_newlines(&mut actual, &mut actual_positions); + let actual_positions : Vec<_> = actual_positions + .into_iter() + .map(|nc| nc.pos.0).collect(); assert_eq!(actual.as_str(), after); + assert_eq!(actual_positions, expected_positions); } - check("", ""); - check("\n", "\n"); - check("\r", "\r"); - check("\r\r", "\r\r"); - check("\r\n", "\n"); - check("hello world", "hello world"); - check("hello\nworld", "hello\nworld"); - check("hello\r\nworld", "hello\nworld"); - check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n"); - check("\r\r\n", "\r\n"); - check("hello\rworld", "hello\rworld"); + check("", "", &[]); + check("\n", "\n", &[]); + check("\r", "\r", &[]); + check("\r\r", "\r\r", &[]); + check("\r\n", "\n", &[1]); + check("hello world", "hello world", &[]); + check("hello\nworld", "hello\nworld", &[]); + check("hello\r\nworld", "hello\nworld", &[6]); + check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n", &[1, 7, 13]); + check("\r\r\n", "\r\n", &[2]); + check("hello\rworld", "hello\rworld", &[]); } |
