From edb5214b29cd7de06dd10f673986d38e568b077c Mon Sep 17 00:00:00 2001 From: Jack O'Connor Date: Thu, 1 Aug 2019 12:04:28 -0400 Subject: avoid unnecessary reservations in std::io::Take::read_to_end Prevously the `read_to_end` implementation for `std::io::Take` used its own `limit` as a cap on the `reservation_size`. However, that could still result in an over-allocation like this: 1. Call `reader.take(5).read_to_end(&mut vec)`. 2. `read_to_end_with_reservation` reserves 5 bytes and calls `read`. 3. `read` writes 5 bytes. 4. `read_to_end_with_reservation` reserves 5 bytes and calls `read`. 5. `read` writes 0 bytes. 6. The read loop ends with `vec` having length 5 and capacity 10. The reservation of 5 bytes was correct for the read at step 2 but unnecessary for the read at step 4. By that second read, `Take::limit` is 0, but the `read_to_end_with_reservation` loop is still using the same `reservation_size` it started with. Solve this by having `read_to_end_with_reservation` take a closure, which lets it get a fresh `reservation_size` for each read. This is an implementation detail which doesn't affect any public API. --- src/libstd/io/mod.rs | 66 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 8 deletions(-) (limited to 'src/libstd') diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index f2b6ce6feb2..5060f368229 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -353,12 +353,17 @@ fn append_to_string(buf: &mut String, f: F) -> Result // Because we're extending the buffer with uninitialized data for trusted // readers, we need to make sure to truncate that if any of this panics. fn read_to_end(r: &mut R, buf: &mut Vec) -> Result { - read_to_end_with_reservation(r, buf, 32) + read_to_end_with_reservation(r, buf, |_| 32) } -fn read_to_end_with_reservation(r: &mut R, - buf: &mut Vec, - reservation_size: usize) -> Result +fn read_to_end_with_reservation( + r: &mut R, + buf: &mut Vec, + mut reservation_size: F, +) -> Result +where + R: Read + ?Sized, + F: FnMut(&R) -> usize, { let start_len = buf.len(); let mut g = Guard { len: buf.len(), buf: buf }; @@ -366,7 +371,7 @@ fn read_to_end_with_reservation(r: &mut R, loop { if g.len == g.buf.len() { unsafe { - g.buf.reserve(reservation_size); + g.buf.reserve(reservation_size(r)); let capacity = g.buf.capacity(); g.buf.set_len(capacity); r.initializer().initialize(&mut g.buf[g.len..]); @@ -2253,9 +2258,10 @@ impl Read for Take { } fn read_to_end(&mut self, buf: &mut Vec) -> Result { - let reservation_size = cmp::min(self.limit, 32) as usize; - - read_to_end_with_reservation(self, buf, reservation_size) + // Pass in a reservation_size closure that respects the current value + // of limit for each read. If we hit the read limit, this prevents the + // final zero-byte read from allocating again. + read_to_end_with_reservation(self, buf, |self_| cmp::min(self_.limit, 32) as usize) } } @@ -2378,6 +2384,7 @@ impl Iterator for Lines { #[cfg(test)] mod tests { + use crate::cmp; use crate::io::prelude::*; use super::{Cursor, SeekFrom, repeat}; use crate::io::{self, IoSlice, IoSliceMut}; @@ -2651,6 +2658,49 @@ mod tests { Ok(()) } + // A simple example reader which uses the default implementation of + // read_to_end. + struct ExampleSliceReader<'a> { + slice: &'a [u8], + } + + impl<'a> Read for ExampleSliceReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let len = cmp::min(self.slice.len(), buf.len()); + buf[..len].copy_from_slice(&self.slice[..len]); + self.slice = &self.slice[len..]; + Ok(len) + } + } + + #[test] + fn test_read_to_end_capacity() -> io::Result<()> { + let input = &b"foo"[..]; + + // read_to_end() generally needs to over-allocate, both for efficiency + // and so that it can distinguish EOF. Assert that this is the case + // with this simple ExampleSliceReader struct, which uses the default + // implementation of read_to_end. Even though vec1 is allocated with + // exactly enough capacity for the read, read_to_end will allocate more + // space here. + let mut vec1 = Vec::with_capacity(input.len()); + ExampleSliceReader { slice: input }.read_to_end(&mut vec1)?; + assert_eq!(vec1.len(), input.len()); + assert!(vec1.capacity() > input.len(), "allocated more"); + + // However, std::io::Take includes an implementation of read_to_end + // that will not allocate when the limit has already been reached. In + // this case, vec2 never grows. + let mut vec2 = Vec::with_capacity(input.len()); + ExampleSliceReader { slice: input } + .take(input.len() as u64) + .read_to_end(&mut vec2)?; + assert_eq!(vec2.len(), input.len()); + assert_eq!(vec2.capacity(), input.len(), "did not allocate more"); + + Ok(()) + } + #[test] fn io_slice_mut_advance() { let mut buf1 = [1; 8]; -- cgit 1.4.1-3-g733a5 From 8e91dca596fcbab866a64f9502c476dff5bb06f6 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Mon, 19 Aug 2019 16:14:07 -0700 Subject: Consolidate sigemptyset workarounds In sys/unix/process, we work around the sigemptyset linking issues on android in two different ways. This change consolidates these workarounds, and avoids duplicating bindings from `libc`. --- src/libstd/sys/unix/process/process_common.rs | 54 ++++++++++++--------------- src/libstd/sys/unix/process/process_unix.rs | 13 ++----- 2 files changed, 27 insertions(+), 40 deletions(-) (limited to 'src/libstd') diff --git a/src/libstd/sys/unix/process/process_common.rs b/src/libstd/sys/unix/process/process_common.rs index 6bb20bbe087..21fca23a8fe 100644 --- a/src/libstd/sys/unix/process/process_common.rs +++ b/src/libstd/sys/unix/process/process_common.rs @@ -20,6 +20,30 @@ cfg_if::cfg_if! { } } +// Android with api less than 21 define sig* functions inline, so it is not +// available for dynamic link. Implementing sigemptyset and sigaddset allow us +// to support older Android version (independent of libc version). +// The following implementations are based on https://git.io/vSkNf +cfg_if::cfg_if! { + if #[cfg(target_os = "android")] { + pub unsafe fn sigemptyset(set: *mut libc::sigset_t) -> libc::c_int { + set.write_bytes(0u8, 1); + return 0; + } + #[allow(dead_code)] + pub unsafe fn sigaddset(set: *mut libc::sigset_t, signum: libc::c_int) -> libc::c_int { + use crate::{slice, mem}; + + let raw = slice::from_raw_parts_mut(set as *mut u8, mem::size_of::()); + let bit = (signum - 1) as usize; + raw[bit / 8] |= 1 << (bit % 8); + return 0; + } + } else { + pub use libc::{sigemptyset, sigaddset}; + } +} + //////////////////////////////////////////////////////////////////////////////// // Command //////////////////////////////////////////////////////////////////////////////// @@ -429,36 +453,6 @@ mod tests { } } - // Android with api less than 21 define sig* functions inline, so it is not - // available for dynamic link. Implementing sigemptyset and sigaddset allow us - // to support older Android version (independent of libc version). - // The following implementations are based on https://git.io/vSkNf - - #[cfg(not(target_os = "android"))] - extern { - #[cfg_attr(target_os = "netbsd", link_name = "__sigemptyset14")] - fn sigemptyset(set: *mut libc::sigset_t) -> libc::c_int; - - #[cfg_attr(target_os = "netbsd", link_name = "__sigaddset14")] - fn sigaddset(set: *mut libc::sigset_t, signum: libc::c_int) -> libc::c_int; - } - - #[cfg(target_os = "android")] - unsafe fn sigemptyset(set: *mut libc::sigset_t) -> libc::c_int { - set.write_bytes(0u8, 1); - return 0; - } - - #[cfg(target_os = "android")] - unsafe fn sigaddset(set: *mut libc::sigset_t, signum: libc::c_int) -> libc::c_int { - use crate::slice; - - let raw = slice::from_raw_parts_mut(set as *mut u8, mem::size_of::()); - let bit = (signum - 1) as usize; - raw[bit / 8] |= 1 << (bit % 8); - return 0; - } - // See #14232 for more information, but it appears that signal delivery to a // newly spawned process may just be raced in the macOS, so to prevent this // test from being flaky we ignore it on macOS. diff --git a/src/libstd/sys/unix/process/process_unix.rs b/src/libstd/sys/unix/process/process_unix.rs index 327d82e60cf..a9711c71b7a 100644 --- a/src/libstd/sys/unix/process/process_unix.rs +++ b/src/libstd/sys/unix/process/process_unix.rs @@ -214,14 +214,7 @@ impl Command { // need to clean things up now to avoid confusing the program // we're about to run. let mut set = MaybeUninit::::uninit(); - if cfg!(target_os = "android") { - // Implementing sigemptyset allow us to support older Android - // versions. See the comment about Android and sig* functions in - // process_common.rs - set.as_mut_ptr().write_bytes(0u8, 1); - } else { - cvt(libc::sigemptyset(set.as_mut_ptr()))?; - } + cvt(sigemptyset(set.as_mut_ptr()))?; cvt(libc::pthread_sigmask(libc::SIG_SETMASK, set.as_ptr(), ptr::null_mut()))?; let ret = sys::signal(libc::SIGPIPE, libc::SIG_DFL); @@ -363,10 +356,10 @@ impl Command { } let mut set = MaybeUninit::::uninit(); - cvt(libc::sigemptyset(set.as_mut_ptr()))?; + cvt(sigemptyset(set.as_mut_ptr()))?; cvt(libc::posix_spawnattr_setsigmask(attrs.0.as_mut_ptr(), set.as_ptr()))?; - cvt(libc::sigaddset(set.as_mut_ptr(), libc::SIGPIPE))?; + cvt(sigaddset(set.as_mut_ptr(), libc::SIGPIPE))?; cvt(libc::posix_spawnattr_setsigdefault(attrs.0.as_mut_ptr(), set.as_ptr()))?; -- cgit 1.4.1-3-g733a5