diff options
| author | blake2-ppc <blake2-ppc> | 2013-08-19 15:34:48 +0200 |
|---|---|---|
| committer | blake2-ppc <blake2-ppc> | 2013-08-19 16:11:45 +0200 |
| commit | 8fe83028870ac6ac48e99a38d2992bedc26ec0d7 (patch) | |
| tree | 821bd65530ba050829b9e7da1abd79bef75d7f4a /src | |
| parent | 30ab96b27229000d3754e7dee64fc431b5105150 (diff) | |
| download | rust-8fe83028870ac6ac48e99a38d2992bedc26ec0d7.tar.gz rust-8fe83028870ac6ac48e99a38d2992bedc26ec0d7.zip | |
std::str: Use iterators instead of while loops for CharSplitIterator
Embed an iterator in the CharSplitIterator struct, and combine that with the former bool `only_ascii`; so use an enum instead.
Diffstat (limited to 'src')
| -rw-r--r-- | src/libstd/str.rs | 78 |
1 files changed, 45 insertions, 33 deletions
diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 7fde1c9f03c..d8f723d9c78 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -23,7 +23,7 @@ use clone::Clone; use container::{Container, Mutable}; use iter::Times; use iterator::{Iterator, FromIterator, Extendable}; -use iterator::{Filter, AdditiveIterator, Map}; +use iterator::{Filter, AdditiveIterator, Map, Enumerate}; use iterator::{Invert, DoubleEndedIterator}; use libc; use num::{Saturating, Zero}; @@ -359,9 +359,18 @@ pub type ByteIterator<'self> = /// Use with the `std::iterator` module. pub type ByteRevIterator<'self> = Invert<ByteIterator<'self>>; +/// An iterator over byte index and either &u8 or char +#[deriving(Clone)] +enum OffsetIterator<'self> { + // use ByteIterator here when it can be cloned + ByteOffset(Enumerate<vec::VecIterator<'self, u8>>), + CharOffset(CharOffsetIterator<'self>), +} + /// An iterator over the substrings of a string, separated by `sep`. #[deriving(Clone)] pub struct CharSplitIterator<'self,Sep> { + priv iter: OffsetIterator<'self>, priv string: &'self str, priv position: uint, priv sep: Sep, @@ -370,7 +379,6 @@ pub struct CharSplitIterator<'self,Sep> { /// Whether an empty string at the end is allowed priv allow_trailing_empty: bool, priv finished: bool, - priv only_ascii: bool } /// An iterator over the words of a string, separated by an sequence of whitespace @@ -386,39 +394,39 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep> fn next(&mut self) -> Option<&'self str> { if self.finished { return None } - let l = self.string.len(); let start = self.position; - - if self.only_ascii { - // this gives a *huge* speed up for splitting on ASCII - // characters (e.g. '\n' or ' ') - while self.position < l && self.count > 0 { - let byte = self.string[self.position]; - - if self.sep.matches(byte as char) { - let slice = unsafe { raw::slice_bytes(self.string, start, self.position) }; - self.position += 1; - self.count -= 1; - return Some(slice); - } - self.position += 1; - } - } else { - while self.position < l && self.count > 0 { - let CharRange {ch, next} = self.string.char_range_at(self.position); - - if self.sep.matches(ch) { - let slice = unsafe { raw::slice_bytes(self.string, start, self.position) }; - self.position = next; - self.count -= 1; - return Some(slice); - } - self.position = next; + let len = self.string.len(); + + if self.count > 0 { + match self.iter { + // this gives a *huge* speed up for splitting on ASCII + // characters (e.g. '\n' or ' ') + ByteOffset(ref mut iter) => + for (idx, &byte) in *iter { + if self.sep.matches(byte as char) { + self.position = idx + 1; + self.count -= 1; + return Some(unsafe { + raw::slice_bytes(self.string, start, idx) + }) + } + }, + CharOffset(ref mut iter) => + for (idx, ch) in *iter { + if self.sep.matches(ch) { + // skip over the separator + self.position = self.string.char_range_at(idx).next; + self.count -= 1; + return Some(unsafe { + raw::slice_bytes(self.string, start, idx) + }) + } + }, } } self.finished = true; - if self.allow_trailing_empty || start < l { - Some(unsafe { raw::slice_bytes(self.string, start, l) }) + if self.allow_trailing_empty || start < len { + Some(unsafe { raw::slice_bytes(self.string, start, len) }) } else { None } @@ -1327,15 +1335,19 @@ impl<'self> StrSlice<'self> for &'self str { #[inline] fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool) -> CharSplitIterator<'self, Sep> { - let only_ascii = sep.only_ascii(); + let iter = if sep.only_ascii() { + ByteOffset(self.as_bytes().iter().enumerate()) + } else { + CharOffset(self.char_offset_iter()) + }; CharSplitIterator { + iter: iter, string: *self, position: 0, sep: sep, count: count, allow_trailing_empty: allow_trailing_empty, finished: false, - only_ascii: only_ascii } } |
