about summary refs log tree commit diff
path: root/src/libstd/sys/unix
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2018-05-31 17:47:12 +0000
committerbors <bors@rust-lang.org>2018-05-31 17:47:12 +0000
commit5342d40c1f49ef82ebff4c30fdad9f3b6fd339c1 (patch)
tree8515fd1199d67cc74b579cffe04c89276ea4791c /src/libstd/sys/unix
parent6de4ec679d7179251bef205427d52d093c40a787 (diff)
parent8dec03b71af22a160803c241b6812b8e54ee9671 (diff)
downloadrust-5342d40c1f49ef82ebff4c30fdad9f3b6fd339c1.tar.gz
rust-5342d40c1f49ef82ebff4c30fdad9f3b6fd339c1.zip
Auto merge of #51050 - symphorien:fstatat, r=kennytm
std::fs::DirEntry.metadata(): use fstatat instead of lstat when possible

When reading a directory with `read_dir`, querying metadata for a resulting `DirEntry` is done by building the whole path and then `lstat`ing it, which requires the kernel to resolve the whole path. Instead, one
can use the file descriptor to the enumerated directory and use `fstatat`. This make the resolving step
unnecessary.
This PR implements using `fstatat` on linux, android and emscripten.

## Compatibility across targets
`fstatat` is POSIX.
* Linux >= 2.6.19 according to https://linux.die.net/man/2/fstatat
* android according to https://android.googlesource.com/platform/bionic/+/master/libc/libc.map.txt#392
* emscripten according to https://github.com/kripken/emscripten/blob/7f89560101843198787530731f40a65288f6f15f/system/include/libc/sys/stat.h#L76

The man page says "A similar system call exists on Solaris." but I haven't found it.

## Compatibility with old platforms
This was introduced with glibc 2.4 according to the man page. The only information I could find about the minimal version of glibc rust must support is this discussion https://internals.rust-lang.org/t/bumping-glibc-requirements-for-the-rust-toolchain/5111/10
The conclusion, if I understand correctly, is that currently rust supports glibc >= 2.3.4 but the "real" requirement is Centos 5 with glibc 2.5. This PR would make the minimal version 2.4, so this should be fine.

## Benefit
I did the following silly benchmark:
```rust
use std::io;
use std::fs;
use std::os::linux::fs::MetadataExt;
use std::time::Instant;

fn main() -> Result<(), io::Error> {
    let mut n = 0;
    let mut size = 0;
    let start = Instant::now();
    for entry in fs::read_dir("/nix/store/.links")? {
        let entry = entry?;
        let stat = entry.metadata()?;
        size += stat.st_size();
        n+=1;
    }
    println!("{} files, size {}, time {:?}", n, size, Instant::now().duration_since(start));
    Ok(())
}
```
On warm cache, with current rust nightly:
```
1014099 files, size 76895290022, time Duration { secs: 2, nanos: 65832118 }
```
(between 2.1 and 2.9 seconds usually)
With this PR:
```
1014099 files, size 76895290022, time Duration { secs: 1, nanos: 581662953 }
```
(1.5 to 1.6 seconds usually).

approximately 40% faster :)

On cold cache there is not much to gain because path lookup (which we spare) would have been a cache hit:
Before
```
1014099 files, size 76895290022, time Duration { secs: 391, nanos: 739874992 }
```
After
```
1014099 files, size 76895290022, time Duration { secs: 388, nanos: 431567396 }
```
## Testing
The tests were run on linux `x86_64`
```
python x.py test src/tools/tidy
./x.py test src/libstd
```
and the above benchmark.
I did not test any other target.
Diffstat (limited to 'src/libstd/sys/unix')
-rw-r--r--src/libstd/sys/unix/fs.rs45
1 files changed, 33 insertions, 12 deletions
diff --git a/src/libstd/sys/unix/fs.rs b/src/libstd/sys/unix/fs.rs
index 889d21cad65..c4d94259bd6 100644
--- a/src/libstd/sys/unix/fs.rs
+++ b/src/libstd/sys/unix/fs.rs
@@ -25,6 +25,8 @@ use sys_common::{AsInner, FromInner};
 
 #[cfg(any(target_os = "linux", target_os = "emscripten", target_os = "l4re"))]
 use libc::{stat64, fstat64, lstat64, off64_t, ftruncate64, lseek64, dirent64, readdir64_r, open64};
+#[cfg(any(target_os = "linux", target_os = "emscripten", target_os = "android"))]
+use libc::{fstatat, dirfd};
 #[cfg(target_os = "android")]
 use libc::{stat as stat64, fstat as fstat64, lstat as lstat64, lseek64,
            dirent as dirent64, open as open64};
@@ -48,11 +50,15 @@ pub struct FileAttr {
     stat: stat64,
 }
 
-pub struct ReadDir {
+// all DirEntry's will have a reference to this struct
+struct InnerReadDir {
     dirp: Dir,
-    root: Arc<PathBuf>,
+    root: PathBuf,
 }
 
+#[derive(Clone)]
+pub struct ReadDir(Arc<InnerReadDir>);
+
 struct Dir(*mut libc::DIR);
 
 unsafe impl Send for Dir {}
@@ -60,8 +66,8 @@ unsafe impl Sync for Dir {}
 
 pub struct DirEntry {
     entry: dirent64,
-    root: Arc<PathBuf>,
-    // We need to store an owned copy of the directory name
+    dir: ReadDir,
+    // We need to store an owned copy of the entry name
     // on Solaris and Fuchsia because a) it uses a zero-length
     // array to store the name, b) its lifetime between readdir
     // calls is not guaranteed.
@@ -207,7 +213,7 @@ impl fmt::Debug for ReadDir {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         // This will only be called from std::fs::ReadDir, which will add a "ReadDir()" frame.
         // Thus the result will be e g 'ReadDir("/home")'
-        fmt::Debug::fmt(&*self.root, f)
+        fmt::Debug::fmt(&*self.0.root, f)
     }
 }
 
@@ -223,7 +229,7 @@ impl Iterator for ReadDir {
                 // is safe to use in threaded applications and it is generally preferred
                 // over the readdir_r(3C) function.
                 super::os::set_errno(0);
-                let entry_ptr = libc::readdir(self.dirp.0);
+                let entry_ptr = libc::readdir(self.0.dirp.0);
                 if entry_ptr.is_null() {
                     // NULL can mean either the end is reached or an error occurred.
                     // So we had to clear errno beforehand to check for an error now.
@@ -240,7 +246,7 @@ impl Iterator for ReadDir {
                     entry: *entry_ptr,
                     name: ::slice::from_raw_parts(name as *const u8,
                                                   namelen as usize).to_owned().into_boxed_slice(),
-                    root: self.root.clone()
+                    dir: self.clone()
                 };
                 if ret.name_bytes() != b"." && ret.name_bytes() != b".." {
                     return Some(Ok(ret))
@@ -254,11 +260,11 @@ impl Iterator for ReadDir {
         unsafe {
             let mut ret = DirEntry {
                 entry: mem::zeroed(),
-                root: self.root.clone()
+                dir: self.clone(),
             };
             let mut entry_ptr = ptr::null_mut();
             loop {
-                if readdir64_r(self.dirp.0, &mut ret.entry, &mut entry_ptr) != 0 {
+                if readdir64_r(self.0.dirp.0, &mut ret.entry, &mut entry_ptr) != 0 {
                     return Some(Err(Error::last_os_error()))
                 }
                 if entry_ptr.is_null() {
@@ -281,13 +287,27 @@ impl Drop for Dir {
 
 impl DirEntry {
     pub fn path(&self) -> PathBuf {
-        self.root.join(OsStr::from_bytes(self.name_bytes()))
+        self.dir.0.root.join(OsStr::from_bytes(self.name_bytes()))
     }
 
     pub fn file_name(&self) -> OsString {
         OsStr::from_bytes(self.name_bytes()).to_os_string()
     }
 
+    #[cfg(any(target_os = "linux", target_os = "emscripten", target_os = "android"))]
+    pub fn metadata(&self) -> io::Result<FileAttr> {
+        let fd = cvt(unsafe {dirfd(self.dir.0.dirp.0)})?;
+        let mut stat: stat64 = unsafe { mem::zeroed() };
+        cvt(unsafe {
+            fstatat(fd,
+                    self.entry.d_name.as_ptr(),
+                    &mut stat as *mut _ as *mut _,
+                    libc::AT_SYMLINK_NOFOLLOW)
+        })?;
+        Ok(FileAttr { stat: stat })
+    }
+
+    #[cfg(not(any(target_os = "linux", target_os = "emscripten", target_os = "android")))]
     pub fn metadata(&self) -> io::Result<FileAttr> {
         lstat(&self.path())
     }
@@ -664,14 +684,15 @@ impl fmt::Debug for File {
 }
 
 pub fn readdir(p: &Path) -> io::Result<ReadDir> {
-    let root = Arc::new(p.to_path_buf());
+    let root = p.to_path_buf();
     let p = cstr(p)?;
     unsafe {
         let ptr = libc::opendir(p.as_ptr());
         if ptr.is_null() {
             Err(Error::last_os_error())
         } else {
-            Ok(ReadDir { dirp: Dir(ptr), root: root })
+            let inner = InnerReadDir { dirp: Dir(ptr), root };
+            Ok(ReadDir(Arc::new(inner)))
         }
     }
 }