about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-09-21 02:50:54 +0000
committerbors <bors@rust-lang.org>2023-09-21 02:50:54 +0000
commit4fda889bf8735755573b27e6116ce025f3ded5f9 (patch)
tree320dfacc8bd8990b5a8e046f48a7cf6131ec11ff
parentcbce15c6173cd0fcd4abe25c108067d32f1135b4 (diff)
parent5f33647fb392c2d4f71af94a223bc1a9abb80643 (diff)
downloadrust-4fda889bf8735755573b27e6116ce025f3ded5f9.tar.gz
rust-4fda889bf8735755573b27e6116ce025f3ded5f9.zip
Auto merge of #115549 - saethlin:include-bytes-resilient, r=jackh726
Fall back to the unoptimized implementation in read_binary_file if File::metadata lies

Fixes https://github.com/rust-lang/rust/issues/115458

r? `@jackh726` because you approved the previous PR
-rw-r--r--compiler/rustc_span/src/source_map.rs33
-rw-r--r--compiler/rustc_span/src/source_map/tests.rs27
2 files changed, 58 insertions, 2 deletions
diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs
index 68727a6c40e..0b575c13adf 100644
--- a/compiler/rustc_span/src/source_map.rs
+++ b/compiler/rustc_span/src/source_map.rs
@@ -127,10 +127,39 @@ impl FileLoader for RealFileLoader {
 
         let mut bytes = Lrc::new_uninit_slice(len as usize);
         let mut buf = BorrowedBuf::from(Lrc::get_mut(&mut bytes).unwrap());
-        file.read_buf_exact(buf.unfilled())?;
+        match file.read_buf_exact(buf.unfilled()) {
+            Ok(()) => {}
+            Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
+                drop(bytes);
+                return fs::read(path).map(Vec::into);
+            }
+            Err(e) => return Err(e),
+        }
         // SAFETY: If the read_buf_exact call returns Ok(()), then we have
         // read len bytes and initialized the buffer.
-        Ok(unsafe { bytes.assume_init() })
+        let bytes = unsafe { bytes.assume_init() };
+
+        // At this point, we've read all the bytes that filesystem metadata reported exist.
+        // But we are not guaranteed to be at the end of the file, because we did not attempt to do
+        // a read with a non-zero-sized buffer and get Ok(0).
+        // So we do small read to a fixed-size buffer. If the read returns no bytes then we're
+        // already done, and we just return the Lrc we built above.
+        // If the read returns bytes however, we just fall back to reading into a Vec then turning
+        // that into an Lrc, losing our nice peak memory behavior. This fallback code path should
+        // be rarely exercised.
+
+        let mut probe = [0u8; 32];
+        let n = loop {
+            match file.read(&mut probe) {
+                Ok(0) => return Ok(bytes),
+                Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
+                Err(e) => return Err(e),
+                Ok(n) => break n,
+            }
+        };
+        let mut bytes: Vec<u8> = bytes.iter().copied().chain(probe[..n].iter().copied()).collect();
+        file.read_to_end(&mut bytes)?;
+        Ok(bytes.into())
     }
 }
 
diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs
index e393db02064..a12f50c87a2 100644
--- a/compiler/rustc_span/src/source_map/tests.rs
+++ b/compiler/rustc_span/src/source_map/tests.rs
@@ -567,3 +567,30 @@ fn test_next_point() {
     assert_eq!(span.hi().0, 6);
     assert!(sm.span_to_snippet(span).is_err());
 }
+
+#[cfg(target_os = "linux")]
+#[test]
+fn read_binary_file_handles_lying_stat() {
+    // read_binary_file tries to read the contents of a file into an Lrc<[u8]> while
+    // never having two copies of the data in memory at once. This is an optimization
+    // to support include_bytes! with large files. But since Rust allocators are
+    // sensitive to alignment, our implementation can't be bootstrapped off calling
+    // std::fs::read. So we test that we have the same behavior even on files where
+    // fs::metadata lies.
+
+    // stat always says that /proc/self/cmdline is length 0, but it isn't.
+    let cmdline = Path::new("/proc/self/cmdline");
+    let len = std::fs::metadata(cmdline).unwrap().len() as usize;
+    let real = std::fs::read(cmdline).unwrap();
+    assert!(len < real.len());
+    let bin = RealFileLoader.read_binary_file(cmdline).unwrap();
+    assert_eq!(&real[..], &bin[..]);
+
+    // stat always says that /sys/devices/system/cpu/kernel_max is the size of a block.
+    let kernel_max = Path::new("/sys/devices/system/cpu/kernel_max");
+    let len = std::fs::metadata(kernel_max).unwrap().len() as usize;
+    let real = std::fs::read(kernel_max).unwrap();
+    assert!(len > real.len());
+    let bin = RealFileLoader.read_binary_file(kernel_max).unwrap();
+    assert_eq!(&real[..], &bin[..]);
+}