diff options
| author | The8472 <git@infinite-source.de> | 2020-09-10 22:12:42 +0200 |
|---|---|---|
| committer | The8472 <git@infinite-source.de> | 2020-11-13 19:46:35 +0100 |
| commit | 46e7fbe60b53e486ff39d29c571428c8a345e925 (patch) | |
| tree | 2d9b663b4581d9875074eec29eb6ab08823e859d /library/std/src | |
| parent | 0624730d9e9e2b6de974b6f4edd1ea48ab5f240c (diff) | |
| download | rust-46e7fbe60b53e486ff39d29c571428c8a345e925.tar.gz rust-46e7fbe60b53e486ff39d29c571428c8a345e925.zip | |
reduce syscalls by inferring FD types based on source struct instead of calling stat()
also adds handling for edge-cases involving large sparse files where sendfile could fail with EOVERFLOW
Diffstat (limited to 'library/std/src')
| -rw-r--r-- | library/std/src/io/copy.rs | 168 | ||||
| -rw-r--r-- | library/std/src/sys/unix/fs.rs | 83 |
2 files changed, 158 insertions, 93 deletions
diff --git a/library/std/src/io/copy.rs b/library/std/src/io/copy.rs index e8cbe6a7e71..31bfdb63386 100644 --- a/library/std/src/io/copy.rs +++ b/library/std/src/io/copy.rs @@ -99,6 +99,7 @@ mod kernel_copy { use crate::os::unix::fs::FileTypeExt; use crate::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use crate::process::{ChildStderr, ChildStdin, ChildStdout}; + use crate::sys::fs::{copy_regular_files, sendfile_splice, CopyResult, SpliceMode}; pub(super) fn copy_spec<R: Read + ?Sized, W: Write + ?Sized>( read: &mut R, @@ -108,20 +109,55 @@ mod kernel_copy { SpecCopy::copy(copier) } + /// This type represents either the inferred `FileType` of a `RawFd` based on the source + /// type from which it was extracted or the actual metadata + /// + /// The methods on this type only provide hints, due to `AsRawFd` and `FromRawFd` the inferred + /// type may be wrong. enum FdMeta { + /// We obtained the FD from a type that can contain any type of `FileType` and queried the metadata + /// because it is cheaper than probing all possible syscalls (reader side) Metadata(Metadata), Socket, Pipe, - None, + /// We don't have any metadata, e.g. because the original type was `File` which can represent + /// any `FileType` and we did not query the metadata either since it did not seem beneficial + /// (writer side) + NoneObtained, } impl FdMeta { - fn is_fifo(&self) -> bool { + fn maybe_fifo(&self) -> bool { match self { FdMeta::Metadata(meta) => meta.file_type().is_fifo(), FdMeta::Socket => false, FdMeta::Pipe => true, - FdMeta::None => false, + FdMeta::NoneObtained => true, + } + } + + fn potential_sendfile_source(&self) -> bool { + match self { + // procfs erronously shows 0 length on non-empty readable files. + // and if a file is truly empty then a `read` syscall will determine that and skip the write syscall + // thus there would be benefit from attempting sendfile + FdMeta::Metadata(meta) + if meta.file_type().is_file() && meta.len() > 0 + || meta.file_type().is_block_device() => + { + true + } + _ => false, + } + } + + fn copy_file_range_candidate(&self) -> bool { + match self { + // copy_file_range will fail on empty procfs files. `read` can determine whether EOF has been reached + // without extra cost and skip the write, thus there is no benefit in attempting copy_file_range + FdMeta::Metadata(meta) if meta.is_file() && meta.len() > 0 => true, + FdMeta::NoneObtained => true, + _ => false, } } } @@ -149,66 +185,65 @@ mod kernel_copy { let r_cfg = reader.properties(); let w_cfg = writer.properties(); - // before direct operations on file descriptors ensure that all source and sink buffers are emtpy + // before direct operations on file descriptors ensure that all source and sink buffers are emtpy let mut flush = || -> crate::io::Result<u64> { let bytes = reader.drain_to(writer, u64::MAX)?; + // BufWriter buffered bytes have already been accounted for in earlier write() calls writer.flush()?; Ok(bytes) }; - match (r_cfg, w_cfg) { - ( - CopyParams(FdMeta::Metadata(reader_meta), Some(readfd)), - CopyParams(FdMeta::Metadata(writer_meta), Some(writefd)), - ) if reader_meta.is_file() && writer_meta.is_file() => { - let bytes_flushed = flush()?; - let max_write = reader.min_limit(); - let (mut reader, mut writer) = - unsafe { (fd_as_file(readfd), fd_as_file(writefd)) }; - let len = reader_meta.len(); - crate::sys::fs::copy_regular_files( - &mut reader, - &mut writer, - min(len, max_write), - ) - .map(|bytes_copied| bytes_copied + bytes_flushed) + let mut written = 0u64; + + if let (CopyParams(input_meta, Some(readfd)), CopyParams(output_meta, Some(writefd))) = + (r_cfg, w_cfg) + { + written += flush()?; + let max_write = reader.min_limit(); + + if input_meta.copy_file_range_candidate() && output_meta.copy_file_range_candidate() + { + let result = copy_regular_files(readfd, writefd, max_write); + + match result { + CopyResult::Ended(Ok(bytes_copied)) => return Ok(bytes_copied + written), + CopyResult::Ended(err) => return err, + CopyResult::Fallback(bytes) => written += bytes, + } } - ( - CopyParams(FdMeta::Metadata(reader_meta), Some(readfd)), - CopyParams(_, Some(writefd)), - ) if reader_meta.is_file() => { - // try sendfile, most modern systems it should work with any target as long as the source is a mmapable file. - // in the rare cases where it's no supported the wrapper function will fall back to a normal copy loop - let bytes_flushed = flush()?; - let (mut reader, mut writer) = - unsafe { (fd_as_file(readfd), fd_as_file(writefd)) }; - let len = reader_meta.len(); - let max_write = reader.min_limit(); - crate::sys::fs::sendfile_splice( - crate::sys::fs::SpliceMode::Sendfile, - &mut reader, - &mut writer, - min(len, max_write), - ) - .map(|bytes_sent| bytes_sent + bytes_flushed) + + // on modern kernels sendfile can copy from any mmapable type (some but not all regular files and block devices) + // to any writable file descriptor. On older kernels the writer side can only be a socket. + // So we just try and fallback if needed. + // If current file offsets + write sizes overflow it may also fail, we do not try to fix that and instead + // fall back to the generic copy loop. + if input_meta.potential_sendfile_source() { + let result = sendfile_splice(SpliceMode::Sendfile, readfd, writefd, max_write); + + match result { + CopyResult::Ended(Ok(bytes_copied)) => return Ok(bytes_copied + written), + CopyResult::Ended(err) => return err, + CopyResult::Fallback(bytes) => written += bytes, + } } - (CopyParams(reader_meta, Some(readfd)), CopyParams(writer_meta, Some(writefd))) - if reader_meta.is_fifo() || writer_meta.is_fifo() => - { - // splice - let bytes_flushed = flush()?; - let max_write = reader.min_limit(); - let (mut reader, mut writer) = - unsafe { (fd_as_file(readfd), fd_as_file(writefd)) }; - crate::sys::fs::sendfile_splice( - crate::sys::fs::SpliceMode::Splice, - &mut reader, - &mut writer, - max_write, - ) - .map(|bytes_sent| bytes_sent + bytes_flushed) + + if input_meta.maybe_fifo() || output_meta.maybe_fifo() { + let result = sendfile_splice(SpliceMode::Splice, readfd, writefd, max_write); + + match result { + CopyResult::Ended(Ok(bytes_copied)) => return Ok(bytes_copied + written), + CopyResult::Ended(err) => return err, + CopyResult::Fallback(0) => { /* use fallback */ } + CopyResult::Fallback(_) => { + unreachable!("splice should not return > 0 bytes on the fallback path") + } + } } - _ => super::generic_copy(reader, writer), + } + + match super::generic_copy(reader, writer) { + Ok(bytes) => Ok(bytes + written), + err => err, } } } @@ -235,7 +270,10 @@ mod kernel_copy { fn properties(&self) -> CopyParams; } - impl<T> CopyRead for &mut T where T: CopyRead { + impl<T> CopyRead for &mut T + where + T: CopyRead, + { fn drain_to<W: Write>(&mut self, writer: &mut W, limit: u64) -> Result<u64> { (**self).drain_to(writer, limit) } @@ -249,13 +287,15 @@ mod kernel_copy { } } - impl<T> CopyWrite for &mut T where T: CopyWrite { + impl<T> CopyWrite for &mut T + where + T: CopyWrite, + { fn properties(&self) -> CopyParams { (**self).properties() } } - impl CopyRead for File { fn properties(&self) -> CopyParams { CopyParams(fd_to_meta(self), Some(self.as_raw_fd())) @@ -270,13 +310,13 @@ mod kernel_copy { impl CopyWrite for File { fn properties(&self) -> CopyParams { - CopyParams(fd_to_meta(self), Some(self.as_raw_fd())) + CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd())) } } impl CopyWrite for &File { fn properties(&self) -> CopyParams { - CopyParams(fd_to_meta(*self), Some(self.as_raw_fd())) + CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd())) } } @@ -345,13 +385,13 @@ mod kernel_copy { impl CopyWrite for StdoutLock<'_> { fn properties(&self) -> CopyParams { - CopyParams(fd_to_meta(self), Some(self.as_raw_fd())) + CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd())) } } impl CopyWrite for StderrLock<'_> { fn properties(&self) -> CopyParams { - CopyParams(fd_to_meta(self), Some(self.as_raw_fd())) + CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd())) } } @@ -411,11 +451,7 @@ mod kernel_copy { let file: ManuallyDrop<File> = ManuallyDrop::new(unsafe { File::from_raw_fd(fd) }); match file.metadata() { Ok(meta) => FdMeta::Metadata(meta), - Err(_) => FdMeta::None, + Err(_) => FdMeta::NoneObtained, } } - - unsafe fn fd_as_file(fd: RawFd) -> ManuallyDrop<File> { - ManuallyDrop::new(File::from_raw_fd(fd)) - } } diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index b106eb0a5b6..0bab95053a4 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1195,17 +1195,26 @@ pub fn copy(from: &Path, to: &Path) -> io::Result<u64> { let max_len = u64::MAX; let (mut writer, _) = open_to_and_set_permissions(to, reader_metadata)?; - copy_regular_files(&mut reader, &mut writer, max_len) + return match copy_regular_files(reader.as_raw_fd(), writer.as_raw_fd(), max_len) { + CopyResult::Ended(result) => result, + CopyResult::Fallback(written) => { + // fallback is only > 0 on EOVERFLOW, which shouldn't happen + // because the copy loop starts at a file offset 0 and countns down from `len` + assert_eq!(0, written); + io::copy::generic_copy(&mut reader, &mut writer) + } + }; } /// linux-specific implementation that will attempt to use copy_file_range for copy offloading /// as the name says, it only works on regular files +/// +/// Callers must handle fallback to a generic copy loop. +/// `Fallback` may indicate non-zero number of bytes already written +/// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`). +/// If the initial file offset was 0 then `Fallback` will only contain `0`. #[cfg(any(target_os = "linux", target_os = "android"))] -pub(crate) fn copy_regular_files( - reader: &mut crate::fs::File, - writer: &mut crate::fs::File, - max_len: u64, -) -> io::Result<u64> { +pub(crate) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> CopyResult { use crate::cmp; use crate::sync::atomic::{AtomicBool, Ordering}; @@ -1228,14 +1237,18 @@ pub(crate) fn copy_regular_files( let mut written = 0u64; while written < max_len { let copy_result = if has_copy_file_range { - let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64) as usize; + let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64); + // cap to 2GB chunks in case u64::MAX is passed in as file size and the file has a non-zero offset + // this allows us to copy large chunks without hitting the limit, + // unless someone sets a file offset close to u64::MAX - 2GB, in which case the fallback would kick in + let bytes_to_copy = cmp::min(bytes_to_copy as usize, 0x8000_0000usize); let copy_result = unsafe { // We actually don't have to adjust the offsets, // because copy_file_range adjusts the file offset automatically cvt(copy_file_range( - reader.as_raw_fd(), + reader, ptr::null_mut(), - writer.as_raw_fd(), + writer, ptr::null_mut(), bytes_to_copy, 0, @@ -1260,12 +1273,14 @@ pub(crate) fn copy_regular_files( // - reading virtual files from the proc filesystem which appear to have 0 size // but are not empty. noted in coreutils to affect kernels at least up to 5.6.19. // - copying from an overlay filesystem in docker. reported to occur on fedora 32. - return io::copy(reader, writer); + return CopyResult::Fallback(0); } - Ok(0) => return Ok(written), // reached EOF + Ok(0) => return CopyResult::Ended(Ok(written)), // reached EOF Ok(ret) => written += ret as u64, Err(err) => { match err.raw_os_error() { + // when file offset + max_length > u64::MAX + Some(libc::EOVERFLOW) => return CopyResult::Fallback(written), Some( libc::ENOSYS | libc::EXDEV | libc::EINVAL | libc::EPERM | libc::EOPNOTSUPP, ) => { @@ -1276,43 +1291,55 @@ pub(crate) fn copy_regular_files( // - copy_file_range is disallowed, for example by seccomp (EPERM) // - copy_file_range cannot be used with pipes or device nodes (EINVAL) assert_eq!(written, 0); - return io::copy::generic_copy(reader, writer); + return CopyResult::Fallback(0); } - _ => return Err(err), + _ => return CopyResult::Ended(Err(err)), } } } } - Ok(written) + CopyResult::Ended(Ok(written)) } +#[derive(PartialEq)] pub(crate) enum SpliceMode { Sendfile, Splice, } +pub(crate) enum CopyResult { + Ended(io::Result<u64>), + Fallback(u64), +} + /// performs splice or sendfile between file descriptors +/// Does _not_ fall back to a generic copy loop. #[cfg(any(target_os = "linux", target_os = "android"))] pub(crate) fn sendfile_splice( mode: SpliceMode, - reader: &mut crate::fs::File, - writer: &mut crate::fs::File, + reader: RawFd, + writer: RawFd, len: u64, -) -> io::Result<u64> { +) -> CopyResult { let mut written = 0u64; while written < len { let chunk_size = crate::cmp::min(len - written, 0x7ffff000_u64) as usize; let result = match mode { - SpliceMode::Sendfile => cvt(unsafe { - libc::sendfile(writer.as_raw_fd(), reader.as_raw_fd(), ptr::null_mut(), chunk_size) - }), + SpliceMode::Sendfile => { + cvt(unsafe { libc::sendfile(writer, reader, ptr::null_mut(), chunk_size) }) + } SpliceMode::Splice => cvt(unsafe { libc::splice( - reader.as_raw_fd(), + reader, ptr::null_mut(), - writer.as_raw_fd(), + writer, ptr::null_mut(), + // default pipe size is 64KiB. try to only fill/drain half of that capacity + // so that the next loop iteration won't be put to sleep. + // If reader and writer operate at the same pace they will experience fewer blocking waits. + // This is only needed for splice since sendfile stays in kernel space when it has to block. + //crate::cmp::min(32*1024, chunk_size), chunk_size, 0, ) @@ -1325,17 +1352,19 @@ pub(crate) fn sendfile_splice( Err(err) => { match err.raw_os_error() { Some(os_err) if os_err == libc::EINVAL => { - // Try fallback io::copy if splice/sendfile do not support this particular - // file descritor (EINVAL) + // splice/sendfile do not support this particular file descritor (EINVAL) assert_eq!(written, 0); - return io::copy::generic_copy(reader, writer); + return CopyResult::Fallback(0); + } + Some(os_err) if mode == SpliceMode::Sendfile && os_err == libc::EOVERFLOW => { + return CopyResult::Fallback(written); } - _ => return Err(err), + _ => return CopyResult::Ended(Err(err)), } } } } - Ok(written) + CopyResult::Ended(Ok(written)) } #[cfg(any(target_os = "macos", target_os = "ios"))] |
