diff options
| author | bors <bors@rust-lang.org> | 2019-08-18 04:37:01 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2019-08-18 04:37:01 +0000 |
| commit | ef1ecbefb8719e408150738664d443a73f757ffd (patch) | |
| tree | ee8573eb5d0c96c61503f605f1dc5448b333ce9f /src/libsyntax_pos | |
| parent | fc8765d6d8623b2b5b4ca1d526ed1d7beb3fce18 (diff) | |
| parent | 911398b96cc4825798c0887ec6ebce775ff5d2d1 (diff) | |
| download | rust-ef1ecbefb8719e408150738664d443a73f757ffd.tar.gz rust-ef1ecbefb8719e408150738664d443a73f757ffd.zip | |
Auto merge of #62948 - matklad:failable-file-loading, r=petrochenkov
Normalize newlines when loading files Fixes #62865
Diffstat (limited to 'src/libsyntax_pos')
| -rw-r--r-- | src/libsyntax_pos/lib.rs | 56 | ||||
| -rw-r--r-- | src/libsyntax_pos/tests.rs | 20 |
2 files changed, 76 insertions, 0 deletions
diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index aa36fe27d8e..a17cd7625fb 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -1043,6 +1043,7 @@ impl SourceFile { mut src: String, start_pos: BytePos) -> Result<SourceFile, OffsetOverflowError> { remove_bom(&mut src); + normalize_newlines(&mut src); let src_hash = { let mut hasher: StableHasher<u128> = StableHasher::new(); @@ -1210,6 +1211,61 @@ fn remove_bom(src: &mut String) { } } + +/// Replaces `\r\n` with `\n` in-place in `src`. +/// +/// Returns error if there's a lone `\r` in the string +fn normalize_newlines(src: &mut String) { + if !src.as_bytes().contains(&b'\r') { + return; + } + + // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding. + // While we *can* call `as_mut_vec` and do surgery on the live string + // directly, let's rather steal the contents of `src`. This makes the code + // safe even if a panic occurs. + + let mut buf = std::mem::replace(src, String::new()).into_bytes(); + let mut gap_len = 0; + let mut tail = buf.as_mut_slice(); + loop { + let idx = match find_crlf(&tail[gap_len..]) { + None => tail.len(), + Some(idx) => idx + gap_len, + }; + tail.copy_within(gap_len..idx, 0); + tail = &mut tail[idx - gap_len..]; + if tail.len() == gap_len { + break; + } + gap_len += 1; + } + + // Account for removed `\r`. + // After `set_len`, `buf` is guaranteed to contain utf-8 again. + let new_len = buf.len() - gap_len; + unsafe { + buf.set_len(new_len); + *src = String::from_utf8_unchecked(buf); + } + + fn find_crlf(src: &[u8]) -> Option<usize> { + let mut search_idx = 0; + while let Some(idx) = find_cr(&src[search_idx..]) { + if src[search_idx..].get(idx + 1) != Some(&b'\n') { + search_idx += idx + 1; + continue; + } + return Some(search_idx + idx); + } + None + } + + fn find_cr(src: &[u8]) -> Option<usize> { + src.iter().position(|&b| b == b'\r') + } +} + // _____________________________________________________________________________ // Pos, BytePos, CharPos // diff --git a/src/libsyntax_pos/tests.rs b/src/libsyntax_pos/tests.rs index 78c4e18e6ae..6bd6016020a 100644 --- a/src/libsyntax_pos/tests.rs +++ b/src/libsyntax_pos/tests.rs @@ -16,3 +16,23 @@ fn test_lookup_line() { assert_eq!(lookup_line(lines, BytePos(28)), 2); assert_eq!(lookup_line(lines, BytePos(29)), 2); } + +#[test] +fn test_normalize_newlines() { + fn check(before: &str, after: &str) { + let mut actual = before.to_string(); + normalize_newlines(&mut actual); + assert_eq!(actual.as_str(), after); + } + check("", ""); + check("\n", "\n"); + check("\r", "\r"); + check("\r\r", "\r\r"); + check("\r\n", "\n"); + check("hello world", "hello world"); + check("hello\nworld", "hello\nworld"); + check("hello\r\nworld", "hello\nworld"); + check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n"); + check("\r\r\n", "\r\n"); + check("hello\rworld", "hello\rworld"); +} |
