diff options
| author | bors <bors@rust-lang.org> | 2014-04-10 21:01:41 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2014-04-10 21:01:41 -0700 |
| commit | cea8def62068b405495ecd1810124ebc88b4f90b (patch) | |
| tree | 65d5755bd532a9213799c52572db6d6683d5e942 /src/libstd | |
| parent | 0156af156d70efd5a3c96d0c5b8fc9bec39a7ae5 (diff) | |
| parent | def90f43e2df9968cda730a2a30cb7ccb9513002 (diff) | |
| download | rust-cea8def62068b405495ecd1810124ebc88b4f90b.tar.gz rust-cea8def62068b405495ecd1810124ebc88b4f90b.zip | |
auto merge of #13440 : huonw/rust/strbuf, r=alexcrichton
libstd: Implement `StrBuf`, a new string buffer type like `Vec`, and port all code over to use it. Rebased & tests-fixed version of https://github.com/mozilla/rust/pull/13269
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/char.rs | 11 | ||||
| -rw-r--r-- | src/libstd/hash/sip.rs | 11 | ||||
| -rw-r--r-- | src/libstd/lib.rs | 1 | ||||
| -rw-r--r-- | src/libstd/path/windows.rs | 23 | ||||
| -rw-r--r-- | src/libstd/prelude.rs | 1 | ||||
| -rw-r--r-- | src/libstd/str.rs | 559 | ||||
| -rw-r--r-- | src/libstd/strbuf.rs | 374 | ||||
| -rw-r--r-- | src/libstd/vec.rs | 3 |
8 files changed, 484 insertions, 499 deletions
diff --git a/src/libstd/char.rs b/src/libstd/char.rs index 46447e4a416..702dbcca8be 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -30,7 +30,8 @@ use iter::{Iterator, range_step}; use str::StrSlice; use unicode::{derived_property, property, general_category, decompose, conversions}; -#[cfg(test)] use str::OwnedStr; +#[cfg(test)] use str::Str; +#[cfg(test)] use strbuf::StrBuf; #[cfg(not(test))] use cmp::{Eq, Ord}; #[cfg(not(test))] use default::Default; @@ -747,9 +748,9 @@ fn test_is_digit() { #[test] fn test_escape_default() { fn string(c: char) -> ~str { - let mut result = ~""; + let mut result = StrBuf::new(); escape_default(c, |c| { result.push_char(c); }); - return result; + return result.into_owned(); } assert_eq!(string('\n'), ~"\\n"); assert_eq!(string('\r'), ~"\\r"); @@ -769,9 +770,9 @@ fn test_escape_default() { #[test] fn test_escape_unicode() { fn string(c: char) -> ~str { - let mut result = ~""; + let mut result = StrBuf::new(); escape_unicode(c, |c| { result.push_char(c); }); - return result; + return result.into_owned(); } assert_eq!(string('\x00'), ~"\\x00"); assert_eq!(string('\n'), ~"\\x0a"); diff --git a/src/libstd/hash/sip.rs b/src/libstd/hash/sip.rs index 6217ff0f58c..4a523e3d09e 100644 --- a/src/libstd/hash/sip.rs +++ b/src/libstd/hash/sip.rs @@ -291,7 +291,8 @@ mod tests { use iter::Iterator; use num::ToStrRadix; use option::{Some, None}; - use str::{Str, OwnedStr}; + use str::Str; + use strbuf::StrBuf; use slice::{Vector, ImmutableVector, OwnedVector}; use self::test::BenchHarness; @@ -387,11 +388,11 @@ mod tests { let mut state_full = SipState::new_with_keys(k0, k1); fn to_hex_str(r: &[u8, ..8]) -> ~str { - let mut s = ~""; + let mut s = StrBuf::new(); for b in r.iter() { s.push_str((*b as uint).to_str_radix(16u)); } - s + s.into_owned() } fn result_bytes(h: u64) -> ~[u8] { @@ -408,11 +409,11 @@ mod tests { fn result_str(h: u64) -> ~str { let r = result_bytes(h); - let mut s = ~""; + let mut s = StrBuf::new(); for b in r.iter() { s.push_str((*b as uint).to_str_radix(16u)); } - s + s.into_owned() } while t < 64 { diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 985f8a8eb0a..9325a0ad112 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -128,6 +128,7 @@ pub mod tuple; pub mod slice; pub mod vec; pub mod str; +pub mod strbuf; pub mod ascii; diff --git a/src/libstd/path/windows.rs b/src/libstd/path/windows.rs index ca9b351210d..57dae68b842 100644 --- a/src/libstd/path/windows.rs +++ b/src/libstd/path/windows.rs @@ -20,9 +20,10 @@ use from_str::FromStr; use io::Writer; use iter::{AdditiveIterator, DoubleEndedIterator, Extendable, Rev, Iterator, Map}; use option::{Option, Some, None}; -use str; -use str::{CharSplits, OwnedStr, Str, StrVector, StrSlice}; use slice::{Vector, OwnedVector, ImmutableVector}; +use str::{CharSplits, OwnedStr, Str, StrVector, StrSlice}; +use str; +use strbuf::StrBuf; use super::{contains_nul, BytesContainer, GenericPath, GenericPathUnsafe}; /// Iterator that yields successive components of a Path as &str @@ -175,7 +176,7 @@ impl GenericPathUnsafe for Path { let filename = filename.container_as_str().unwrap(); match self.sepidx_or_prefix_len() { None if ".." == self.repr => { - let mut s = str::with_capacity(3 + filename.len()); + let mut s = StrBuf::with_capacity(3 + filename.len()); s.push_str(".."); s.push_char(SEP); s.push_str(filename); @@ -185,20 +186,20 @@ impl GenericPathUnsafe for Path { self.update_normalized(filename); } Some((_,idxa,end)) if self.repr.slice(idxa,end) == ".." => { - let mut s = str::with_capacity(end + 1 + filename.len()); + let mut s = StrBuf::with_capacity(end + 1 + filename.len()); s.push_str(self.repr.slice_to(end)); s.push_char(SEP); s.push_str(filename); self.update_normalized(s); } Some((idxb,idxa,_)) if self.prefix == Some(DiskPrefix) && idxa == self.prefix_len() => { - let mut s = str::with_capacity(idxb + filename.len()); + let mut s = StrBuf::with_capacity(idxb + filename.len()); s.push_str(self.repr.slice_to(idxb)); s.push_str(filename); self.update_normalized(s); } Some((idxb,_,_)) => { - let mut s = str::with_capacity(idxb + 1 + filename.len()); + let mut s = StrBuf::with_capacity(idxb + 1 + filename.len()); s.push_str(self.repr.slice_to(idxb)); s.push_char(SEP); s.push_str(filename); @@ -252,7 +253,7 @@ impl GenericPathUnsafe for Path { let path_ = if is_verbatim(me) { Path::normalize__(path, None) } else { None }; let pathlen = path_.as_ref().map_or(path.len(), |p| p.len()); - let mut s = str::with_capacity(me.repr.len() + 1 + pathlen); + let mut s = StrBuf::with_capacity(me.repr.len() + 1 + pathlen); s.push_str(me.repr); let plen = me.prefix_len(); // if me is "C:" we don't want to add a path separator @@ -699,9 +700,9 @@ impl Path { match prefix { Some(VerbatimUNCPrefix(x, 0)) if s.len() == 8 + x => { // the server component has no trailing '\' - let mut s = s.into_owned(); + let mut s = StrBuf::from_owned_str(s.into_owned()); s.push_char(SEP); - Some(s) + Some(s.into_owned()) } _ => None } @@ -764,7 +765,7 @@ impl Path { let n = prefix_.len() + if is_abs { comps.len() } else { comps.len() - 1} + comps.iter().map(|v| v.len()).sum(); - let mut s = str::with_capacity(n); + let mut s = StrBuf::with_capacity(n); match prefix { Some(DiskPrefix) => { s.push_char(prefix_[0].to_ascii().to_upper().to_char()); @@ -795,7 +796,7 @@ impl Path { s.push_char(SEP); s.push_str(comp); } - Some(s) + Some(s.into_owned()) } } } diff --git a/src/libstd/prelude.rs b/src/libstd/prelude.rs index 0a4b32f5a89..b7bcbefa468 100644 --- a/src/libstd/prelude.rs +++ b/src/libstd/prelude.rs @@ -59,6 +59,7 @@ pub use slice::{ImmutableEqVector, ImmutableTotalOrdVector, ImmutableCloneableVe pub use slice::{OwnedVector, OwnedCloneableVector, OwnedEqVector}; pub use slice::{MutableVector, MutableTotalOrdVector}; pub use slice::{Vector, VectorVector, CloneableVector, ImmutableVector}; +pub use strbuf::StrBuf; pub use vec::Vec; // Reexported runtime types diff --git a/src/libstd/str.rs b/src/libstd/str.rs index e24011ca021..525988c698f 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -60,13 +60,6 @@ to that string. With these guarantees, strings can easily transition between being mutable/immutable with the same benefits of having mutable strings in other languages. -```rust -let mut buf = ~"testing"; -buf.push_char(' '); -buf.push_str("123"); -assert_eq!(buf, ~"testing 123"); - ``` - # Representation Rust's string type, `str`, is a sequence of unicode codepoints encoded as a @@ -97,7 +90,6 @@ use libc; use num::Saturating; use option::{None, Option, Some}; use ptr; -use ptr::RawPtr; use from_str::FromStr; use slice; use slice::{OwnedVector, OwnedCloneableVector, ImmutableVector, MutableVector}; @@ -105,6 +97,7 @@ use slice::{Vector}; use vec::Vec; use default::Default; use raw::Repr; +use strbuf::StrBuf; /* Section: Creating a string @@ -149,9 +142,9 @@ pub fn from_byte(b: u8) -> ~str { /// Convert a char to a string pub fn from_char(ch: char) -> ~str { - let mut buf = ~""; + let mut buf = StrBuf::new(); buf.push_char(ch); - buf + buf.into_owned() } /// Convert a vector of chars to a string @@ -159,11 +152,6 @@ pub fn from_chars(chs: &[char]) -> ~str { chs.iter().map(|c| *c).collect() } -#[doc(hidden)] -pub fn push_str(lhs: &mut ~str, rhs: &str) { - lhs.push_str(rhs) -} - /// Methods for vectors of strings pub trait StrVector { /// Concatenate a vector of strings. @@ -180,12 +168,13 @@ impl<'a, S: Str> StrVector for &'a [S] { // `len` calculation may overflow but push_str but will check boundaries let len = self.iter().map(|s| s.as_slice().len()).sum(); - let mut result = with_capacity(len); + let mut result = StrBuf::with_capacity(len); for s in self.iter() { result.push_str(s.as_slice()) } - result + + result.into_owned() } fn connect(&self, sep: &str) -> ~str { @@ -198,7 +187,7 @@ impl<'a, S: Str> StrVector for &'a [S] { // `len` calculation may overflow but push_str but will check boundaries let len = sep.len() * (self.len() - 1) + self.iter().map(|s| s.as_slice().len()).sum(); - let mut result = with_capacity(len); + let mut result = StrBuf::with_capacity(len); let mut first = true; for s in self.iter() { @@ -209,7 +198,7 @@ impl<'a, S: Str> StrVector for &'a [S] { } result.push_str(s.as_slice()); } - result + result.into_owned() } } @@ -675,7 +664,7 @@ impl<'a> Iterator<char> for Normalizations<'a> { /// /// The original string with all occurances of `from` replaced with `to` pub fn replace(s: &str, from: &str, to: &str) -> ~str { - let mut result = ~""; + let mut result = StrBuf::new(); let mut last_end = 0; for (start, end) in s.match_indices(from) { result.push_str(unsafe{raw::slice_bytes(s, last_end, start)}); @@ -683,7 +672,7 @@ pub fn replace(s: &str, from: &str, to: &str) -> ~str { last_end = end; } result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())}); - result + result.into_owned() } /* @@ -992,14 +981,14 @@ pub fn truncate_utf16_at_nul<'a>(v: &'a [u16]) -> &'a [u16] { /// assert_eq!(str::from_utf16(v), None); /// ``` pub fn from_utf16(v: &[u16]) -> Option<~str> { - let mut s = with_capacity(v.len() / 2); + let mut s = StrBuf::with_capacity(v.len() / 2); for c in utf16_items(v) { match c { ScalarValue(c) => s.push_char(c), LoneSurrogate(_) => return None } } - Some(s) + Some(s.into_owned()) } /// Decode a UTF-16 encoded vector `v` into a string, replacing @@ -1021,15 +1010,6 @@ pub fn from_utf16_lossy(v: &[u16]) -> ~str { utf16_items(v).map(|c| c.to_char_lossy()).collect() } -/// Allocates a new string with the specified capacity. The string returned is -/// the empty string, but has capacity for much more. -#[inline] -pub fn with_capacity(capacity: uint) -> ~str { - unsafe { - cast::transmute(slice::with_capacity::<~[u8]>(capacity)) - } -} - // https://tools.ietf.org/html/rfc3629 static UTF8_CHAR_WIDTH: [u8, ..256] = [ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -1109,10 +1089,13 @@ pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> { unsafe_get(xs, i) } } - let mut res = with_capacity(total); + + let mut res = StrBuf::with_capacity(total); if i > 0 { - unsafe { raw::push_bytes(&mut res, v.slice_to(i)) }; + unsafe { + res.push_bytes(v.slice_to(i)) + }; } // subseqidx is the index of the first byte of the subsequence we're looking at. @@ -1128,10 +1111,10 @@ pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> { macro_rules! error(() => ({ unsafe { if subseqidx != i_ { - raw::push_bytes(&mut res, v.slice(subseqidx, i_)); + res.push_bytes(v.slice(subseqidx, i_)); } subseqidx = i; - raw::push_bytes(&mut res, REPLACEMENT); + res.push_bytes(REPLACEMENT); } })) @@ -1196,9 +1179,11 @@ pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> { } } if subseqidx < total { - unsafe { raw::push_bytes(&mut res, v.slice(subseqidx, total)) }; + unsafe { + res.push_bytes(v.slice(subseqidx, total)) + }; } - Owned(res) + Owned(res.into_owned()) } /* @@ -1354,7 +1339,6 @@ pub mod raw { use libc; use ptr; use ptr::RawPtr; - use option::{Option, Some, None}; use str::{is_utf8, OwnedStr, StrSlice}; use slice; use slice::{MutableVector, ImmutableVector, OwnedVector}; @@ -1448,48 +1432,6 @@ pub mod raw { }) } - /// Appends a byte to a string. - /// The caller must preserve the valid UTF-8 property. - #[inline] - pub unsafe fn push_byte(s: &mut ~str, b: u8) { - as_owned_vec(s).push(b) - } - - /// Appends a vector of bytes to a string. - /// The caller must preserve the valid UTF-8 property. - #[inline] - pub unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) { - slice::bytes::push_bytes(as_owned_vec(s), bytes); - } - - /// Removes the last byte from a string and returns it. - /// Returns None when an empty string is passed. - /// The caller must preserve the valid UTF-8 property. - pub unsafe fn pop_byte(s: &mut ~str) -> Option<u8> { - let len = s.len(); - if len == 0u { - return None; - } else { - let b = s[len - 1u]; - s.set_len(len - 1); - return Some(b); - } - } - - /// Removes the first byte from a string and returns it. - /// Returns None when an empty string is passed. - /// The caller must preserve the valid UTF-8 property. - pub unsafe fn shift_byte(s: &mut ~str) -> Option<u8> { - let len = s.len(); - if len == 0u { - return None; - } else { - let b = s[0]; - *s = s.slice(1, len).to_owned(); - return Some(b); - } - } - /// Access the str in its vector representation. /// The caller must preserve the valid UTF-8 property when modifying. #[inline] @@ -1525,14 +1467,15 @@ pub mod traits { use iter::Iterator; use ops::Add; use option::{Some, None}; - use str::{Str, StrSlice, OwnedStr, eq_slice}; + use str::{Str, StrSlice, eq_slice}; + use strbuf::StrBuf; impl<'a> Add<&'a str,~str> for &'a str { #[inline] fn add(&self, rhs: & &'a str) -> ~str { - let mut ret = self.to_owned(); + let mut ret = StrBuf::from_owned_str(self.to_owned()); ret.push_str(*rhs); - ret + ret.into_owned() } } @@ -1605,8 +1548,20 @@ pub trait Str { /// Work with `self` as a slice. fn as_slice<'a>(&'a self) -> &'a str; - /// Convert `self` into a ~str, not making a copy if possible + /// Convert `self` into a ~str, not making a copy if possible. fn into_owned(self) -> ~str; + + /// Convert `self` into a `StrBuf`. + #[inline] + fn to_strbuf(&self) -> StrBuf { + StrBuf::from_str(self.as_slice()) + } + + /// Convert `self` into a `StrBuf`, not making a copy if possible. + #[inline] + fn into_strbuf(self) -> StrBuf { + StrBuf::from_owned_str(self.into_owned()) + } } impl<'a> Str for &'a str { @@ -2519,19 +2474,19 @@ impl<'a> StrSlice<'a> for &'a str { } fn escape_default(&self) -> ~str { - let mut out = with_capacity(self.len()); + let mut out = StrBuf::with_capacity(self.len()); for c in self.chars() { c.escape_default(|c| out.push_char(c)); } - out + out.into_owned() } fn escape_unicode(&self) -> ~str { - let mut out = with_capacity(self.len()); + let mut out = StrBuf::with_capacity(self.len()); for c in self.chars() { c.escape_unicode(|c| out.push_char(c)); } - out + out.into_owned() } #[inline] @@ -2574,7 +2529,7 @@ impl<'a> StrSlice<'a> for &'a str { } fn replace(&self, from: &str, to: &str) -> ~str { - let mut result = ~""; + let mut result = StrBuf::new(); let mut last_end = 0; for (start, end) in self.match_indices(from) { result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)}); @@ -2582,7 +2537,7 @@ impl<'a> StrSlice<'a> for &'a str { last_end = end; } result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())}); - result + result.into_owned() } #[inline] @@ -2727,11 +2682,11 @@ impl<'a> StrSlice<'a> for &'a str { } fn repeat(&self, nn: uint) -> ~str { - let mut ret = with_capacity(nn * self.len()); + let mut ret = StrBuf::with_capacity(nn * self.len()); for _ in range(0, nn) { ret.push_str(*self); } - ret + ret.into_owned() } #[inline] @@ -2796,75 +2751,6 @@ impl<'a> StrSlice<'a> for &'a str { /// Methods for owned strings pub trait OwnedStr { - /// Appends a string slice to the back of a string, without overallocating. - fn push_str_no_overallocate(&mut self, rhs: &str); - - /// Appends a string slice to the back of a string - fn push_str(&mut self, rhs: &str); - - /// Appends a character to the back of a string - fn push_char(&mut self, c: char); - - /// Remove the final character from a string and return it. Return None - /// when the string is empty. - fn pop_char(&mut self) -> Option<char>; - - /// Remove the first character from a string and return it. Return None - /// when the string is empty. - fn shift_char(&mut self) -> Option<char>; - - /// Prepend a char to a string - fn unshift_char(&mut self, ch: char); - - /// Insert a new sub-string at the given position in a string, in O(n + m) time - /// (with n and m the lengths of the string and the substring.) - /// This fails if `position` is not at a character boundary. - fn insert(&mut self, position: uint, substring: &str); - - /// Insert a char at the given position in a string, in O(n + m) time - /// (with n and m the lengths of the string and the substring.) - /// This fails if `position` is not at a character boundary. - fn insert_char(&mut self, position: uint, ch: char); - - /// Concatenate two strings together. - fn append(self, rhs: &str) -> ~str; - - /// Reserves capacity for exactly `n` bytes in the given string. - /// - /// Assuming single-byte characters, the resulting string will be large - /// enough to hold a string of length `n`. - /// - /// If the capacity for `s` is already equal to or greater than the requested - /// capacity, then no action is taken. - /// - /// # Arguments - /// - /// * s - A string - /// * n - The number of bytes to reserve space for - fn reserve_exact(&mut self, n: uint); - - /// Reserves capacity for at least `n` bytes in the given string. - /// - /// Assuming single-byte characters, the resulting string will be large - /// enough to hold a string of length `n`. - /// - /// This function will over-allocate in order to amortize the allocation costs - /// in scenarios where the caller may need to repeatedly reserve additional - /// space. - /// - /// If the capacity for `s` is already equal to or greater than the requested - /// capacity, then no action is taken. - /// - /// # Arguments - /// - /// * s - A string - /// * n - The number of bytes to reserve space for - fn reserve(&mut self, n: uint); - - /// Returns the number of single-byte characters the string can hold without - /// reallocating - fn capacity(&self) -> uint; - /// Shorten a string to the specified length (which must be <= the current length) fn truncate(&mut self, len: uint); @@ -2879,120 +2765,13 @@ pub trait OwnedStr { /// modifying its buffers, so it is up to the caller to ensure that /// the string is actually the specified size. unsafe fn set_len(&mut self, new_len: uint); + + /// Pushes the given string onto this string, returning the concatenation of the two strings. + fn append(self, rhs: &str) -> ~str; } impl OwnedStr for ~str { #[inline] - fn push_str_no_overallocate(&mut self, rhs: &str) { - let new_cap = self.len() + rhs.len(); - self.reserve_exact(new_cap); - self.push_str(rhs); - } - - #[inline] - fn push_str(&mut self, rhs: &str) { - unsafe { - raw::push_bytes(self, rhs.as_bytes()); - } - } - - #[inline] - fn push_char(&mut self, c: char) { - let cur_len = self.len(); - // may use up to 4 bytes. - unsafe { - let v = raw::as_owned_vec(self); - v.reserve_additional(4); - - // Attempt to not use an intermediate buffer by just pushing bytes - // directly onto this string. - let write_ptr = v.as_mut_ptr().offset(cur_len as int); - let used = slice::raw::mut_buf_as_slice(write_ptr, 4, |slc| c.encode_utf8(slc)); - - v.set_len(cur_len + used); - } - } - - #[inline] - fn pop_char(&mut self) -> Option<char> { - let end = self.len(); - if end == 0u { - return None; - } else { - let CharRange {ch, next} = self.char_range_at_reverse(end); - unsafe { self.set_len(next); } - return Some(ch); - } - } - - #[inline] - fn shift_char(&mut self) -> Option<char> { - if self.is_empty() { - return None; - } else { - let CharRange {ch, next} = self.char_range_at(0u); - *self = self.slice(next, self.len()).to_owned(); - return Some(ch); - } - } - - #[inline] - fn unshift_char(&mut self, ch: char) { - // This could be more efficient. - let mut new_str = ~""; - new_str.push_char(ch); - new_str.push_str(*self); - *self = new_str; - } - - #[inline] - fn insert(&mut self, position: uint, substring: &str) { - // This could be more efficient. - let mut new_str = self.slice_to(position).to_owned(); - new_str.push_str(substring); - new_str.push_str(self.slice_from(position)); - *self = new_str; - } - - #[inline] - fn insert_char(&mut self, position: uint, ch: char) { - // This could be more efficient. - let mut new_str = self.slice_to(position).to_owned(); - new_str.push_char(ch); - new_str.push_str(self.slice_from(position)); - *self = new_str; - } - - #[inline] - fn append(self, rhs: &str) -> ~str { - let mut new_str = self; - new_str.push_str_no_overallocate(rhs); - new_str - } - - #[inline] - fn reserve_exact(&mut self, n: uint) { - unsafe { - raw::as_owned_vec(self).reserve_exact(n) - } - } - - #[inline] - fn reserve(&mut self, n: uint) { - unsafe { - raw::as_owned_vec(self).reserve(n) - } - } - - #[inline] - fn capacity(&self) -> uint { - unsafe { - let buf: &~[u8] = cast::transmute(self); - buf.capacity() - } - } - - #[inline] fn truncate(&mut self, len: uint) { assert!(len <= self.len()); assert!(self.is_char_boundary(len)); @@ -3008,6 +2787,13 @@ impl OwnedStr for ~str { unsafe fn set_len(&mut self, new_len: uint) { raw::as_owned_vec(self).set_len(new_len) } + + #[inline] + fn append(self, rhs: &str) -> ~str { + let mut new_str = StrBuf::from_owned_str(self); + new_str.push_str(rhs); + new_str.into_owned() + } } impl Clone for ~str { @@ -3021,21 +2807,9 @@ impl FromIterator<char> for ~str { #[inline] fn from_iter<T: Iterator<char>>(iterator: T) -> ~str { let (lower, _) = iterator.size_hint(); - let mut buf = with_capacity(lower); + let mut buf = StrBuf::with_capacity(lower); buf.extend(iterator); - buf - } -} - -impl Extendable<char> for ~str { - #[inline] - fn extend<T: Iterator<char>>(&mut self, mut iterator: T) { - let (lower, _) = iterator.size_hint(); - let reserve = lower + self.len(); - self.reserve(reserve); - for ch in iterator { - self.push_char(ch) - } + buf.into_owned() } } @@ -3054,6 +2828,7 @@ mod tests { use default::Default; use prelude::*; use str::*; + use strbuf::StrBuf; #[test] fn test_eq() { @@ -3118,92 +2893,6 @@ mod tests { } #[test] - fn test_push_str() { - let mut s = ~""; - s.push_str(""); - assert_eq!(s.slice_from(0), ""); - s.push_str("abc"); - assert_eq!(s.slice_from(0), "abc"); - s.push_str("ประเทศไทย中华Việt Nam"); - assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam"); - } - - #[test] - fn test_append() { - let mut s = ~""; - s = s.append(""); - assert_eq!(s.slice_from(0), ""); - s = s.append("abc"); - assert_eq!(s.slice_from(0), "abc"); - s = s.append("ประเทศไทย中华Việt Nam"); - assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam"); - } - - #[test] - fn test_pop_char() { - let mut data = ~"ประเทศไทย中华"; - let cc = data.pop_char(); - assert_eq!(~"ประเทศไทย中", data); - assert_eq!(Some('华'), cc); - } - - #[test] - fn test_pop_char_2() { - let mut data2 = ~"华"; - let cc2 = data2.pop_char(); - assert_eq!(~"", data2); - assert_eq!(Some('华'), cc2); - } - - #[test] - fn test_pop_char_empty() { - let mut data = ~""; - let cc3 = data.pop_char(); - assert_eq!(~"", data); - assert_eq!(None, cc3); - } - - #[test] - fn test_push_char() { - let mut data = ~"ประเทศไทย中"; - data.push_char('华'); - data.push_char('b'); // 1 byte - data.push_char('¢'); // 2 byte - data.push_char('€'); // 3 byte - data.push_char('𤭢'); // 4 byte - assert_eq!(~"ประเทศไทย中华b¢€𤭢", data); - } - - #[test] - fn test_shift_char() { - let mut data = ~"ประเทศไทย中"; - let cc = data.shift_char(); - assert_eq!(~"ระเทศไทย中", data); - assert_eq!(Some('ป'), cc); - } - - #[test] - fn test_unshift_char() { - let mut data = ~"ประเทศไทย中"; - data.unshift_char('华'); - assert_eq!(~"华ประเทศไทย中", data); - } - - #[test] - fn test_insert_char() { - let mut data = ~"ประเทศไทย中"; - data.insert_char(15, '华'); - assert_eq!(~"ประเท华ศไทย中", data); - } - - #[test] - fn test_insert() { - let mut data = ~"ประเทศไทย中"; - data.insert(15, "华中"); - assert_eq!(~"ประเท华中ศไทย中", data); - } - - #[test] fn test_collect() { let empty = ~""; let s: ~str = empty.chars().collect(); @@ -3214,28 +2903,6 @@ mod tests { } #[test] - fn test_extend() { - let data = ~"ประเทศไทย中"; - let mut cpy = data.clone(); - let other = "abc"; - let it = other.chars(); - cpy.extend(it); - assert_eq!(cpy, data + other); - } - - #[test] - fn test_clear() { - let mut empty = ~""; - empty.clear(); - assert_eq!("", empty.as_slice()); - let mut data = ~"ประเทศไทย中"; - data.clear(); - assert_eq!("", data.as_slice()); - data.push_char('华'); - assert_eq!("华", data.as_slice()); - } - - #[test] fn test_into_bytes() { let data = ~"asdf"; let buf = data.into_bytes(); @@ -3346,15 +3013,21 @@ mod tests { assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)}); fn a_million_letter_a() -> ~str { let mut i = 0; - let mut rs = ~""; - while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; } - rs + let mut rs = StrBuf::new(); + while i < 100000 { + rs.push_str("aaaaaaaaaa"); + i += 1; + } + rs.into_owned() } fn half_a_million_letter_a() -> ~str { let mut i = 0; - let mut rs = ~""; - while i < 100000 { rs.push_str("aaaaa"); i += 1; } - rs + let mut rs = StrBuf::new(); + while i < 100000 { + rs.push_str("aaaaa"); + i += 1; + } + rs.into_owned() } let letters = a_million_letter_a(); assert!(half_a_million_letter_a() == @@ -3455,18 +3128,21 @@ mod tests { fn a_million_letter_X() -> ~str { let mut i = 0; - let mut rs = ~""; + let mut rs = StrBuf::new(); while i < 100000 { - push_str(&mut rs, "华华华华华华华华华华"); + rs.push_str("华华华华华华华华华华"); i += 1; } - rs + rs.into_owned() } fn half_a_million_letter_X() -> ~str { let mut i = 0; - let mut rs = ~""; - while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; } - rs + let mut rs = StrBuf::new(); + while i < 100000 { + rs.push_str("华华华华华"); + i += 1; + } + rs.into_owned() } let letters = a_million_letter_X(); assert!(half_a_million_letter_X() == @@ -3609,29 +3285,6 @@ mod tests { } #[test] - fn test_push_byte() { - let mut s = ~"ABC"; - unsafe{raw::push_byte(&mut s, 'D' as u8)}; - assert_eq!(s, ~"ABCD"); - } - - #[test] - fn test_shift_byte() { - let mut s = ~"ABC"; - let b = unsafe{raw::shift_byte(&mut s)}; - assert_eq!(s, ~"BC"); - assert_eq!(b, Some(65u8)); - } - - #[test] - fn test_pop_byte() { - let mut s = ~"ABC"; - let b = unsafe{raw::pop_byte(&mut s)}; - assert_eq!(s, ~"AB"); - assert_eq!(b, Some(67u8)); - } - - #[test] fn test_is_utf8() { // deny overlong encodings assert!(!is_utf8([0xc0, 0x80])); @@ -4324,38 +3977,6 @@ mod tests { } #[test] - fn test_str_truncate() { - let mut s = ~"12345"; - s.truncate(5); - assert_eq!(s.as_slice(), "12345"); - s.truncate(3); - assert_eq!(s.as_slice(), "123"); - s.truncate(0); - assert_eq!(s.as_slice(), ""); - - let mut s = ~"12345"; - let p = s.as_ptr(); - s.truncate(3); - s.push_str("6"); - let p_ = s.as_ptr(); - assert_eq!(p_, p); - } - - #[test] - #[should_fail] - fn test_str_truncate_invalid_len() { - let mut s = ~"12345"; - s.truncate(6); - } - - #[test] - #[should_fail] - fn test_str_truncate_split_codepoint() { - let mut s = ~"\u00FC"; // ü - s.truncate(1); - } - - #[test] fn test_str_from_utf8() { let xs = bytes!("hello"); assert_eq!(from_utf8(xs), Some("hello")); @@ -4658,22 +4279,6 @@ mod bench { } #[bench] - fn bench_with_capacity(bh: &mut BenchHarness) { - bh.iter(|| { - with_capacity(100) - }); - } - - #[bench] - fn bench_push_str(bh: &mut BenchHarness) { - let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; - bh.iter(|| { - let mut r = ~""; - r.push_str(s); - }); - } - - #[bench] fn bench_connect(bh: &mut BenchHarness) { let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; let sep = "→"; diff --git a/src/libstd/strbuf.rs b/src/libstd/strbuf.rs new file mode 100644 index 00000000000..1fcc9c6465a --- /dev/null +++ b/src/libstd/strbuf.rs @@ -0,0 +1,374 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! An owned, growable string that enforces that its contents are valid UTF-8. + +use c_vec::CVec; +use cast; +use char::Char; +use container::Container; +use fmt; +use io::Writer; +use iter::{Extendable, FromIterator, Iterator, range}; +use option::{None, Option, Some}; +use ptr::RawPtr; +use slice::{OwnedVector, Vector}; +use str; +use str::{OwnedStr, Str, StrSlice}; +use vec::Vec; + +/// A growable string stored as a UTF-8 encoded buffer. +#[deriving(Clone, Eq, Ord, TotalEq, TotalOrd)] +pub struct StrBuf { + vec: Vec<u8>, +} + +impl StrBuf { + /// Creates a new string buffer initalized with the empty string. + #[inline] + pub fn new() -> StrBuf { + StrBuf { + vec: Vec::new(), + } + } + + /// Creates a new string buffer with the given capacity. + #[inline] + pub fn with_capacity(capacity: uint) -> StrBuf { + StrBuf { + vec: Vec::with_capacity(capacity), + } + } + + /// Creates a new string buffer from length, capacity, and a pointer. + #[inline] + pub unsafe fn from_raw_parts(length: uint, capacity: uint, ptr: *mut u8) -> StrBuf { + StrBuf { + vec: Vec::from_raw_parts(length, capacity, ptr), + } + } + + /// Creates a new string buffer from the given string. + #[inline] + pub fn from_str(string: &str) -> StrBuf { + StrBuf { + vec: Vec::from_slice(string.as_bytes()) + } + } + + /// Creates a new string buffer from the given owned string, taking care not to copy it. + #[inline] + pub fn from_owned_str(string: ~str) -> StrBuf { + StrBuf { + vec: string.into_bytes().move_iter().collect(), + } + } + + /// Tries to create a new string buffer from the given byte + /// vector, validating that the vector is UTF-8 encoded. + #[inline] + pub fn from_utf8(vec: Vec<u8>) -> Option<StrBuf> { + if str::is_utf8(vec.as_slice()) { + Some(StrBuf { vec: vec }) + } else { + None + } + } + + /// Return the underlying byte buffer, encoded as UTF-8. + #[inline] + pub fn into_bytes(self) -> Vec<u8> { + self.vec + } + + /// Pushes the given string onto this buffer; then, returns `self` so that it can be used + /// again. + #[inline] + pub fn append(mut self, second: &str) -> StrBuf { + self.push_str(second); + self + } + + /// Creates a string buffer by repeating a character `length` times. + #[inline] + pub fn from_char(length: uint, ch: char) -> StrBuf { + if length == 0 { + return StrBuf::new() + } + + let mut buf = StrBuf::new(); + buf.push_char(ch); + let size = buf.len() * length; + buf.reserve(size); + for _ in range(1, length) { + buf.push_char(ch) + } + buf + } + + /// Pushes the given string onto this string buffer. + #[inline] + pub fn push_str(&mut self, string: &str) { + self.vec.push_all(string.as_bytes()) + } + + /// Push `ch` onto the given string `count` times. + #[inline] + pub fn grow(&mut self, count: uint, ch: char) { + for _ in range(0, count) { + self.push_char(ch) + } + } + + /// Returns the number of bytes that this string buffer can hold without reallocating. + #[inline] + pub fn byte_capacity(&self) -> uint { + self.vec.capacity() + } + + /// Reserves capacity for at least `extra` additional bytes in this string buffer. + #[inline] + pub fn reserve_additional(&mut self, extra: uint) { + self.vec.reserve_additional(extra) + } + + /// Reserves capacity for at least `capacity` bytes in this string buffer. + #[inline] + pub fn reserve(&mut self, capacity: uint) { + self.vec.reserve(capacity) + } + + /// Reserves capacity for exactly `capacity` bytes in this string buffer. + #[inline] + pub fn reserve_exact(&mut self, capacity: uint) { + self.vec.reserve_exact(capacity) + } + + /// Shrinks the capacity of this string buffer to match its length. + #[inline] + pub fn shrink_to_fit(&mut self) { + self.vec.shrink_to_fit() + } + + /// Adds the given character to the end of the string. + #[inline] + pub fn push_char(&mut self, ch: char) { + let cur_len = self.len(); + unsafe { + // This may use up to 4 bytes. + self.vec.reserve_additional(4); + + // Attempt to not use an intermediate buffer by just pushing bytes + // directly onto this string. + let mut c_vector = CVec::new(self.vec.as_mut_ptr().offset(cur_len as int), 4); + let used = ch.encode_utf8(c_vector.as_mut_slice()); + self.vec.set_len(cur_len + used); + } + } + + /// Pushes the given bytes onto this string buffer. This is unsafe because it does not check + /// to ensure that the resulting string will be valid UTF-8. + #[inline] + pub unsafe fn push_bytes(&mut self, bytes: &[u8]) { + self.vec.push_all(bytes) + } + + /// Works with the underlying buffer as a byte slice. + #[inline] + pub fn as_bytes<'a>(&'a self) -> &'a [u8] { + self.vec.as_slice() + } + + /// Shorten a string to the specified length (which must be <= the current length) + #[inline] + pub fn truncate(&mut self, len: uint) { + assert!(self.as_slice().is_char_boundary(len)); + self.vec.truncate(len) + } + + /// Appends a byte to this string buffer. The caller must preserve the valid UTF-8 property. + #[inline] + pub unsafe fn push_byte(&mut self, byte: u8) { + self.push_bytes([byte]) + } + + /// Removes the last byte from the string buffer and returns it. Returns `None` if this string + /// buffer is empty. + /// + /// The caller must preserve the valid UTF-8 property. + #[inline] + pub unsafe fn pop_byte(&mut self) -> Option<u8> { + let len = self.len(); + if len == 0 { + return None + } + + let byte = self.as_slice()[len - 1]; + self.vec.set_len(len - 1); + Some(byte) + } + + /// Removes the first byte from the string buffer and returns it. Returns `None` if this string + /// buffer is empty. + /// + /// The caller must preserve the valid UTF-8 property. + pub unsafe fn shift_byte(&mut self) -> Option<u8> { + let len = self.len(); + if len == 0 { + return None + } + + let byte = self.as_slice()[0]; + *self = self.as_slice().slice(1, len).into_strbuf(); + Some(byte) + } +} + +impl Container for StrBuf { + #[inline] + fn len(&self) -> uint { + self.vec.len() + } +} + +impl FromIterator<char> for StrBuf { + fn from_iter<I:Iterator<char>>(iterator: I) -> StrBuf { + let mut buf = StrBuf::new(); + buf.extend(iterator); + buf + } +} + +impl Extendable<char> for StrBuf { + fn extend<I:Iterator<char>>(&mut self, mut iterator: I) { + for ch in iterator { + self.push_char(ch) + } + } +} + +impl Str for StrBuf { + #[inline] + fn as_slice<'a>(&'a self) -> &'a str { + unsafe { + cast::transmute(self.vec.as_slice()) + } + } + + #[inline] + fn into_owned(self) -> ~str { + let StrBuf { + vec: vec + } = self; + unsafe { + cast::transmute::<~[u8],~str>(vec.move_iter().collect()) + } + } +} + +impl fmt::Show for StrBuf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.as_slice().fmt(f) + } +} + +impl<H:Writer> ::hash::Hash<H> for StrBuf { + #[inline] + fn hash(&self, hasher: &mut H) { + self.as_slice().hash(hasher) + } +} + +#[cfg(test)] +mod tests { + extern crate test; + use self::test::BenchHarness; + use str::{Str, StrSlice}; + use super::StrBuf; + + #[bench] + fn bench_with_capacity(bh: &mut BenchHarness) { + bh.iter(|| { + StrBuf::with_capacity(100) + }); + } + + #[bench] + fn bench_push_str(bh: &mut BenchHarness) { + let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; + bh.iter(|| { + let mut r = StrBuf::new(); + r.push_str(s); + }); + } + + #[test] + fn test_push_bytes() { + let mut s = StrBuf::from_str("ABC"); + unsafe { + s.push_bytes([ 'D' as u8 ]); + } + assert_eq!(s.as_slice(), "ABCD"); + } + + #[test] + fn test_push_str() { + let mut s = StrBuf::new(); + s.push_str(""); + assert_eq!(s.as_slice().slice_from(0), ""); + s.push_str("abc"); + assert_eq!(s.as_slice().slice_from(0), "abc"); + s.push_str("ประเทศไทย中华Việt Nam"); + assert_eq!(s.as_slice().slice_from(0), "abcประเทศไทย中华Việt Nam"); + } + + #[test] + fn test_push_char() { + let mut data = StrBuf::from_str("ประเทศไทย中"); + data.push_char('华'); + data.push_char('b'); // 1 byte + data.push_char('¢'); // 2 byte + data.push_char('€'); // 3 byte + data.push_char('𤭢'); // 4 byte + assert_eq!(data.as_slice(), "ประเทศไทย中华b¢€𤭢"); + } + + #[test] + fn test_str_truncate() { + let mut s = StrBuf::from_str("12345"); + s.truncate(5); + assert_eq!(s.as_slice(), "12345"); + s.truncate(3); + assert_eq!(s.as_slice(), "123"); + s.truncate(0); + assert_eq!(s.as_slice(), ""); + + let mut s = StrBuf::from_str("12345"); + let p = s.as_slice().as_ptr(); + s.truncate(3); + s.push_str("6"); + let p_ = s.as_slice().as_ptr(); + assert_eq!(p_, p); + } + + #[test] + #[should_fail] + fn test_str_truncate_invalid_len() { + let mut s = StrBuf::from_str("12345"); + s.truncate(6); + } + + #[test] + #[should_fail] + fn test_str_truncate_split_codepoint() { + let mut s = StrBuf::from_str("\u00FC"); // ü + s.truncate(1); + } +} diff --git a/src/libstd/vec.rs b/src/libstd/vec.rs index 3c5cdfcf94e..da0e0d73fed 100644 --- a/src/libstd/vec.rs +++ b/src/libstd/vec.rs @@ -7,6 +7,7 @@ // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your // option. This file may not be copied, modified, or distributed // except according to those terms. + //! An owned, growable vector. use cast::{forget, transmute}; @@ -28,7 +29,7 @@ use ptr; use rt::global_heap::{malloc_raw, realloc_raw}; use raw::Slice; use slice::{ImmutableEqVector, ImmutableVector, Items, MutItems, MutableVector}; -use slice::{MutableTotalOrdVector, Vector}; +use slice::{MutableTotalOrdVector, OwnedVector, Vector}; /// An owned, growable vector. /// |
