about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2013-09-16 19:35:50 -0700
committerbors <bors@rust-lang.org>2013-09-16 19:35:50 -0700
commitd5e9033a0d380fefb5610c97ff1048c809251bba (patch)
tree86710ef0b5db291229c77c13263b565a412269f3 /src/libstd
parent2f96c22a21299cfe5860b0bb6fdd1af8ac500b11 (diff)
parente211888407db32fcec53f4fa9eb84acdbdf59f87 (diff)
downloadrust-d5e9033a0d380fefb5610c97ff1048c809251bba.tar.gz
rust-d5e9033a0d380fefb5610c97ff1048c809251bba.zip
auto merge of #9108 : blake2-ppc/rust/hazards-on-overflow, r=alexcrichton
Fix uint overflow bugs in std::{at_vec, vec, str}

Closes #8742

Fix issue #8742, which summarized is: unsafe code in vec and str did assume
that a reservation for `X + Y` elements always succeeded, and didn't overflow.

Introduce the method `Vec::reserve_additional(n)` to make it easy to check for
overflow in `Vec::push` and `Vec::push_all`.

In std::str, simplify and remove a lot of the unsafe code and use `push_str`
instead. With improvements to `.push_str` and the new function
`vec::bytes::push_bytes`, it looks like this change has either no or positive
impact on performance.

I believe there are many places still where `v.reserve(A + B)` still can overflow.
This by itself is not an issue unless followed by (unsafe) code that steps aside
boundary checks.
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/at_vec.rs13
-rw-r--r--src/libstd/num/uint.rs12
-rw-r--r--src/libstd/rt/io/extensions.rs2
-rw-r--r--src/libstd/str.rs183
-rw-r--r--src/libstd/vec.rs60
5 files changed, 152 insertions, 118 deletions
diff --git a/src/libstd/at_vec.rs b/src/libstd/at_vec.rs
index ce8e90e1a43..42f511f722d 100644
--- a/src/libstd/at_vec.rs
+++ b/src/libstd/at_vec.rs
@@ -230,13 +230,16 @@ pub mod raw {
     // Implementation detail. Shouldn't be public
     #[allow(missing_doc)]
     pub fn reserve_raw(ty: *TyDesc, ptr: *mut *mut Box<Vec<()>>, n: uint) {
-
+        // check for `uint` overflow
         unsafe {
-            let size_in_bytes = n * (*ty).size;
-            if size_in_bytes > (**ptr).data.alloc {
-                let total_size = size_in_bytes + sys::size_of::<Vec<()>>();
+            if n > (**ptr).data.alloc / (*ty).size {
+                let alloc = n * (*ty).size;
+                let total_size = alloc + sys::size_of::<Vec<()>>();
+                if alloc / (*ty).size != n || total_size < alloc {
+                    fail!("vector size is too large: %u", n);
+                }
                 (*ptr) = local_realloc(*ptr as *(), total_size) as *mut Box<Vec<()>>;
-                (**ptr).data.alloc = size_in_bytes;
+                (**ptr).data.alloc = alloc;
             }
         }
 
diff --git a/src/libstd/num/uint.rs b/src/libstd/num/uint.rs
index dfdd6cf72f7..38a4df270fc 100644
--- a/src/libstd/num/uint.rs
+++ b/src/libstd/num/uint.rs
@@ -101,7 +101,17 @@ pub fn next_power_of_two(n: uint) -> uint {
     let mut tmp: uint = n - 1u;
     let mut shift: uint = 1u;
     while shift <= halfbits { tmp |= tmp >> shift; shift <<= 1u; }
-    return tmp + 1u;
+    tmp + 1u
+}
+
+/// Returns the smallest power of 2 greater than or equal to `n`
+#[inline]
+pub fn next_power_of_two_opt(n: uint) -> Option<uint> {
+    let halfbits: uint = sys::size_of::<uint>() * 4u;
+    let mut tmp: uint = n - 1u;
+    let mut shift: uint = 1u;
+    while shift <= halfbits { tmp |= tmp >> shift; shift <<= 1u; }
+    tmp.checked_add(&1)
 }
 
 #[cfg(target_word_size = "32")]
diff --git a/src/libstd/rt/io/extensions.rs b/src/libstd/rt/io/extensions.rs
index e221f0ee94d..1c48d6e7f1e 100644
--- a/src/libstd/rt/io/extensions.rs
+++ b/src/libstd/rt/io/extensions.rs
@@ -303,7 +303,7 @@ impl<T: Reader> ReaderUtil for T {
             let start_len = buf.len();
             let mut total_read = 0;
 
-            buf.reserve_at_least(start_len + len);
+            buf.reserve_additional(len);
             vec::raw::set_len(buf, start_len + len);
 
             do (|| {
diff --git a/src/libstd/str.rs b/src/libstd/str.rs
index ccb39c605eb..bd484a5074c 100644
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -22,8 +22,7 @@ use char;
 use char::Char;
 use clone::{Clone, DeepClone};
 use container::{Container, Mutable};
-use num::Times;
-use iter::{Iterator, FromIterator, Extendable};
+use iter::{Iterator, FromIterator, Extendable, range};
 use iter::{Filter, AdditiveIterator, Map};
 use iter::{Invert, DoubleEndedIterator, ExactSize};
 use libc;
@@ -33,7 +32,6 @@ use ptr;
 use ptr::RawPtr;
 use to_str::ToStr;
 use uint;
-use unstable::raw::{Repr, Slice};
 use vec;
 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
 use default::Default;
@@ -185,23 +183,15 @@ impl<'self, S: Str> StrVector for &'self [S] {
     fn concat(&self) -> ~str {
         if self.is_empty() { return ~""; }
 
+        // `len` calculation may overflow but push_str but will check boundaries
         let len = self.iter().map(|s| s.as_slice().len()).sum();
 
-        let mut s = with_capacity(len);
+        let mut result = with_capacity(len);
 
-        unsafe {
-            do s.as_mut_buf |buf, _| {
-                let mut buf = buf;
-                for ss in self.iter() {
-                    do ss.as_slice().as_imm_buf |ssbuf, sslen| {
-                        ptr::copy_memory(buf, ssbuf, sslen);
-                        buf = buf.offset(sslen as int);
-                    }
-                }
-            }
-            raw::set_len(&mut s, len);
+        for s in self.iter() {
+            result.push_str(s.as_slice())
         }
-        s
+        result
     }
 
     /// Concatenate a vector of strings, placing a given separator between each.
@@ -212,34 +202,21 @@ impl<'self, S: Str> StrVector for &'self [S] {
         if sep.is_empty() { return self.concat(); }
 
         // this is wrong without the guarantee that `self` is non-empty
+        // `len` calculation may overflow but push_str but will check boundaries
         let len = sep.len() * (self.len() - 1)
             + self.iter().map(|s| s.as_slice().len()).sum();
-        let mut s = ~"";
+        let mut result = with_capacity(len);
         let mut first = true;
 
-        s.reserve(len);
-
-        unsafe {
-            do s.as_mut_buf |buf, _| {
-                do sep.as_imm_buf |sepbuf, seplen| {
-                    let mut buf = buf;
-                    for ss in self.iter() {
-                        do ss.as_slice().as_imm_buf |ssbuf, sslen| {
-                            if first {
-                                first = false;
-                            } else {
-                                ptr::copy_memory(buf, sepbuf, seplen);
-                                buf = buf.offset(seplen as int);
-                            }
-                            ptr::copy_memory(buf, ssbuf, sslen);
-                            buf = buf.offset(sslen as int);
-                        }
-                    }
-                }
+        for s in self.iter() {
+            if first {
+                first = false;
+            } else {
+                result.push_str(sep);
             }
-            raw::set_len(&mut s, len);
+            result.push_str(s.as_slice());
         }
-        s
+        result
     }
 }
 
@@ -961,7 +938,6 @@ static TAG_CONT_U8: u8 = 128u8;
 
 /// Unsafe operations
 pub mod raw {
-    use option::Some;
     use cast;
     use libc;
     use ptr;
@@ -1064,21 +1040,22 @@ pub mod raw {
         }
     }
 
-    /// Appends a byte to a string. (Not UTF-8 safe).
+    /// Appends a byte to a string.
+    /// The caller must preserve the valid UTF-8 property.
     #[inline]
     pub unsafe fn push_byte(s: &mut ~str, b: u8) {
-        let v: &mut ~[u8] = cast::transmute(s);
-        v.push(b);
+        as_owned_vec(s).push(b)
     }
 
-    /// Appends a vector of bytes to a string. (Not UTF-8 safe).
-    unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
-        let new_len = s.len() + bytes.len();
-        s.reserve_at_least(new_len);
-        for byte in bytes.iter() { push_byte(&mut *s, *byte); }
+    /// Appends a vector of bytes to a string.
+    /// The caller must preserve the valid UTF-8 property.
+    #[inline]
+    pub unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
+        vec::bytes::push_bytes(as_owned_vec(s), bytes);
     }
 
-    /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
+    /// Removes the last byte from a string and returns it.
+    /// The caller must preserve the valid UTF-8 property.
     pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
         let len = s.len();
         assert!((len > 0u));
@@ -1087,7 +1064,8 @@ pub mod raw {
         return b;
     }
 
-    /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
+    /// Removes the first byte from a string and returns it.
+    /// The caller must preserve the valid UTF-8 property.
     pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
         let len = s.len();
         assert!((len > 0u));
@@ -1096,6 +1074,13 @@ pub mod raw {
         return b;
     }
 
+    /// Access the str in its vector representation.
+    /// The caller must preserve the valid UTF-8 property when modifying.
+    #[inline]
+    pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
+        cast::transmute(s)
+    }
+
     /// Sets the length of a string
     ///
     /// This will explicitly set the size of the string, without actually
@@ -1103,8 +1088,7 @@ pub mod raw {
     /// the string is actually the specified size.
     #[inline]
     pub unsafe fn set_len(s: &mut ~str, new_len: uint) {
-        let v: &mut ~[u8] = cast::transmute(s);
-        vec::raw::set_len(v, new_len)
+        vec::raw::set_len(as_owned_vec(s), new_len)
     }
 
     /// Sets the length of a string
@@ -2061,22 +2045,11 @@ impl<'self> StrSlice<'self> for &'self str {
 
     /// Given a string, make a new string with repeated copies of it.
     fn repeat(&self, nn: uint) -> ~str {
-        do self.as_imm_buf |buf, len| {
-            let mut ret = with_capacity(nn * len);
-
-            unsafe {
-                do ret.as_mut_buf |rbuf, _len| {
-                    let mut rbuf = rbuf;
-
-                    do nn.times {
-                        ptr::copy_memory(rbuf, buf, len);
-                        rbuf = rbuf.offset(len as int);
-                    }
-                }
-                raw::set_len(&mut ret, nn * len);
-            }
-            ret
+        let mut ret = with_capacity(nn * self.len());
+        for _ in range(0, nn) {
+            ret.push_str(*self);
         }
+        ret
     }
 
     /// Retrieves the first character from a string slice and returns
@@ -2199,36 +2172,16 @@ impl OwnedStr for ~str {
     /// Appends a string slice to the back of a string, without overallocating
     #[inline]
     fn push_str_no_overallocate(&mut self, rhs: &str) {
-        unsafe {
-            let llen = self.len();
-            let rlen = rhs.len();
-            self.reserve(llen + rlen);
-            do self.as_imm_buf |lbuf, _llen| {
-                do rhs.as_imm_buf |rbuf, _rlen| {
-                    let dst = ptr::offset(lbuf, llen as int);
-                    let dst = cast::transmute_mut_unsafe(dst);
-                    ptr::copy_memory(dst, rbuf, rlen);
-                }
-            }
-            raw::set_len(self, llen + rlen);
-        }
+        let new_cap = self.len() + rhs.len();
+        self.reserve(new_cap);
+        self.push_str(rhs);
     }
 
     /// Appends a string slice to the back of a string
     #[inline]
     fn push_str(&mut self, rhs: &str) {
         unsafe {
-            let llen = self.len();
-            let rlen = rhs.len();
-            self.reserve_at_least(llen + rlen);
-            do self.as_imm_buf |lbuf, _llen| {
-                do rhs.as_imm_buf |rbuf, _rlen| {
-                    let dst = ptr::offset(lbuf, llen as int);
-                    let dst = cast::transmute_mut_unsafe(dst);
-                    ptr::copy_memory(dst, rbuf, rlen);
-                }
-            }
-            raw::set_len(self, llen + rlen);
+            raw::push_bytes(self, rhs.as_bytes());
         }
     }
 
@@ -2236,17 +2189,18 @@ impl OwnedStr for ~str {
     #[inline]
     fn push_char(&mut self, c: char) {
         let cur_len = self.len();
-        self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
-
-        // Attempt to not use an intermediate buffer by just pushing bytes
-        // directly onto this string.
+        // may use up to 4 bytes.
         unsafe {
-            let v = self.repr();
-            let len = c.encode_utf8(cast::transmute(Slice {
-                data: ((&(*v).data) as *u8).offset(cur_len as int),
-                len: 4,
-            }));
-            raw::set_len(self, cur_len + len);
+            raw::as_owned_vec(self).reserve_additional(4);
+
+            // Attempt to not use an intermediate buffer by just pushing bytes
+            // directly onto this string.
+            let used = do self.as_mut_buf |buf, _| {
+                do vec::raw::mut_buf_as_slice(buf.offset(cur_len as int), 4) |slc| {
+                    c.encode_utf8(slc)
+                }
+            };
+            raw::set_len(self, cur_len + used);
         }
     }
 
@@ -2306,8 +2260,7 @@ impl OwnedStr for ~str {
     #[inline]
     fn reserve(&mut self, n: uint) {
         unsafe {
-            let v: &mut ~[u8] = cast::transmute(self);
-            (*v).reserve(n);
+            raw::as_owned_vec(self).reserve(n)
         }
     }
 
@@ -2329,7 +2282,7 @@ impl OwnedStr for ~str {
     /// * n - The number of bytes to reserve space for
     #[inline]
     fn reserve_at_least(&mut self, n: uint) {
-        self.reserve(uint::next_power_of_two(n))
+        self.reserve(uint::next_power_of_two_opt(n).unwrap_or(n))
     }
 
     /// Returns the number of single-byte characters the string can hold without
@@ -2359,8 +2312,9 @@ impl OwnedStr for ~str {
 
     #[inline]
     fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
-        let v: &mut ~[u8] = unsafe { cast::transmute(self) };
-        v.as_mut_buf(f)
+        unsafe {
+            raw::as_owned_vec(self).as_mut_buf(f)
+        }
     }
 }
 
@@ -3912,4 +3866,23 @@ mod bench {
             with_capacity(100);
         }
     }
+
+    #[bench]
+    fn bench_push_str(bh: &mut BenchHarness) {
+        let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
+        do bh.iter {
+            let mut r = ~"";
+            r.push_str(s);
+        }
+    }
+
+    #[bench]
+    fn bench_connect(bh: &mut BenchHarness) {
+        let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
+        let sep = "→";
+        let v = [s, s, s, s, s, s, s, s, s, s];
+        do bh.iter {
+            assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
+        }
+    }
 }
diff --git a/src/libstd/vec.rs b/src/libstd/vec.rs
index 47c3a079614..9fc0eaf72b1 100644
--- a/src/libstd/vec.rs
+++ b/src/libstd/vec.rs
@@ -1245,6 +1245,7 @@ pub trait OwnedVector<T> {
 
     fn reserve(&mut self, n: uint);
     fn reserve_at_least(&mut self, n: uint);
+    fn reserve_additional(&mut self, n: uint);
     fn capacity(&self) -> uint;
     fn shrink_to_fit(&mut self);
 
@@ -1300,6 +1301,11 @@ impl<T> OwnedVector<T> for ~[T] {
      * # Arguments
      *
      * * n - The number of elements to reserve space for
+     *
+     * # Failure
+     *
+     * This method always succeeds in reserving space for `n` elements, or it does
+     * not return.
      */
     fn reserve(&mut self, n: uint) {
         // Only make the (slow) call into the runtime if we have to
@@ -1340,7 +1346,26 @@ impl<T> OwnedVector<T> for ~[T] {
      */
     #[inline]
     fn reserve_at_least(&mut self, n: uint) {
-        self.reserve(uint::next_power_of_two(n));
+        self.reserve(uint::next_power_of_two_opt(n).unwrap_or(n));
+    }
+
+    /**
+     * Reserves capacity for at least `n` additional elements in the given vector.
+     *
+     * # Failure
+     *
+     * Fails if the new required capacity overflows uint.
+     *
+     * May also fail if `reserve` fails.
+     */
+    #[inline]
+    fn reserve_additional(&mut self, n: uint) {
+        if self.capacity() - self.len() < n {
+            match self.len().checked_add(&n) {
+                None => fail!("vec::reserve_additional: `uint` overflow"),
+                Some(new_cap) => self.reserve_at_least(new_cap)
+            }
+        }
     }
 
     /// Returns the number of elements the vector can hold without reallocating.
@@ -1376,8 +1401,7 @@ impl<T> OwnedVector<T> for ~[T] {
                 let repr: **Box<Vec<()>> = cast::transmute(&mut *self);
                 let fill = (**repr).data.fill;
                 if (**repr).data.alloc <= fill {
-                    let new_len = self.len() + 1;
-                    self.reserve_at_least(new_len);
+                    self.reserve_additional(1);
                 }
 
                 push_fast(self, t);
@@ -1385,8 +1409,7 @@ impl<T> OwnedVector<T> for ~[T] {
                 let repr: **Vec<()> = cast::transmute(&mut *self);
                 let fill = (**repr).fill;
                 if (**repr).alloc <= fill {
-                    let new_len = self.len() + 1;
-                    self.reserve_at_least(new_len);
+                    self.reserve_additional(1);
                 }
 
                 push_fast(self, t);
@@ -1432,7 +1455,7 @@ impl<T> OwnedVector<T> for ~[T] {
         let self_len = self.len();
         let rhs_len = rhs.len();
         let new_len = self_len + rhs_len;
-        self.reserve_at_least(new_len);
+        self.reserve_additional(rhs.len());
         unsafe { // Note: infallible.
             let self_p = vec::raw::to_mut_ptr(*self);
             let rhs_p = vec::raw::to_ptr(rhs);
@@ -2221,6 +2244,23 @@ pub mod bytes {
         // Bound checks are done at vec::raw::copy_memory.
         unsafe { vec::raw::copy_memory(dst, src, count) }
     }
+
+    /**
+     * Allocate space in `dst` and append the data in `src`.
+     */
+    #[inline]
+    pub fn push_bytes(dst: &mut ~[u8], src: &[u8]) {
+        let old_len = dst.len();
+        dst.reserve_additional(src.len());
+        unsafe {
+            do dst.as_mut_buf |p_dst, len_dst| {
+                do src.as_imm_buf |p_src, len_src| {
+                    ptr::copy_memory(p_dst.offset(len_dst as int), p_src, len_src)
+                }
+            }
+            vec::raw::set_len(dst, old_len + src.len());
+        }
+    }
 }
 
 impl<A: Clone> Clone for ~[A] {
@@ -3620,6 +3660,14 @@ mod tests {
     }
 
     #[test]
+    #[should_fail]
+    fn test_overflow_does_not_cause_segfault_managed() {
+        let mut v = ~[@1];
+        v.reserve(-1);
+        v.push(@2);
+    }
+
+    #[test]
     fn test_mut_split() {
         let mut values = [1u8,2,3,4,5];
         {